import pandas as pd
import numpy as np
import scipy.stats as spicy
df = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/refs/heads/master/penguins.csv")
df.head()
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 male 2007
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 female 2007
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 female 2007
3 Adelie Torgersen NaN NaN NaN NaN NaN 2007
4 Adelie Torgersen 36.7 19.3 193.0 3450.0 female 2007
xbar = np.mean(df["bill_length_mm"])
s = np.std(df["bill_length_mm"])
N = np.sum(~df["bill_length_mm"].isna())
t = spicy.t(df = N - 1).ppf([0.025, 0.975]) # ppf(0.975)
xbar + t * s / np.sqrt(N)
array([43.34209692, 44.50176273])
np.shape(df)
(344, 8)
N
342