import numpy as np
import matplotlib.pyplot as plt

# Step 1: Prepare your data
# x: Independent variable (input)
# y: Dependent variable (output)
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 5])

# Step 2: Perform linear regression using the least squares method

# Add a column of ones to the input data for the intercept (bias term)
X = np.vstack([x, np.ones(len(x))]).T

# Calculate the slope (m) and intercept (b)
a, b = np.linalg.lstsq(X, y, rcond=None)[0]

print(f"Slope (a): {a:.4f}")
print(f"Intercept (b): {b:.4f}")

# Step 3: Predict y values using the regression line
y_pred = a * x + b

# Optional: Plot the data and the regression line
plt.scatter(x, y, color='blue', label='Data points')
plt.plot(x, y_pred, color='red', label='Regression line')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

Slope (a): 0.6000
Intercept (b): 2.2000

x

array([1, 2, 3, 4, 5])

X

array([[1., 1.],
       [2., 1.],
       [3., 1.],
       [4., 1.],
       [5., 1.]])

[x,np.ones(len(x))]

[array([1, 2, 3, 4, 5]), array([1., 1., 1., 1., 1.])]

np.vstack([x, np.ones(len(x))])

array([[1., 2., 3., 4., 5.],
       [1., 1., 1., 1., 1.]])

np.vstack([x, np.ones(len(x))]).T

array([[1., 1.],
       [2., 1.],
       [3., 1.],
       [4., 1.],
       [5., 1.]])

np.vstack([np.zeros(5), np.ones(5), np.zeros(5), np.ones(5)])

array([[0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.]])

a = np.ones(5)
M=np.vstack([a, 2*a, 3*a, 4*a])

M @ M.T

array([[ 5., 10., 15., 20.],
       [10., 20., 30., 40.],
       [15., 30., 45., 60.],
       [20., 40., 60., 80.]])

np.random.normal(100,5,10)

array([100.93573996, 100.20696146, 100.33860864, 101.54845134,
        97.07334677, 101.96880446, 103.85349105, 102.62606973,
       104.37269392,  95.25899349])

x = np.array(range(10))
y = 3*x+2 + np.random.normal(0,1,10)
plt.scatter(x,y);
plt.title("Linear function $y \\approx 3x+2$");

# Add a column of ones to the input data for the intercept (bias term)
X = np.vstack([x, np.ones(len(x))]).T

# Calculate the slope (m) and intercept (b)
a0, b0 = np.linalg.lstsq(X, y, rcond=None)[0]

print(f"Slope (a): {a0}")
print(f"Intercept (b): {b0}")

Slope (a): 3.1815959167949623
Intercept (b): 1.6929138583475791

Sx = sum(x)
Sy = sum(y)
Sxx = sum([i**2 for i in x])
Syy = sum([i**2 for i in y])
Sxy = sum([i*j for i,j in zip(x,y)])

Sx, Sy, Sxx, Syy, Sxy

(45, 160.1009548392492, 285, 3403.5519567262904, 982.9359599122058)

n = len(x)
denom = n*Sxx - Sx*Sx
a = (n*Sxy - Sx*Sy) / denom
b = (Sxx*Sy - Sx*Sxy) / denom

a,b

(3.1815959167949615, 1.6929138583475882)

a-a0, b-b0

(-8.881784197001252e-16, 9.103828801926284e-15)

def two_numbers():
    a = 1
    b = 10
    return a,b

A, B = two_numbers()
print(A,B)

1 10

def linear_least_squares(x,y):
    Sx = sum(x)
    Sy = sum(y)
    Sxx = sum([i**2 for i in x])
    Syy = sum([i**2 for i in y])
    Sxy = sum([i*j for i,j in zip(x,y)])
    n = len(x)
    denom = n*Sxx - Sx*Sx
    a = (n*Sxy - Sx*Sy) / denom
    b = (Sxx*Sy - Sx*Sxy) / denom
    return a,b

all_a = []
all_b = []
for i in range(100):
    x = np.arange(-5,5,0.01)
    y = 5*x - 15 + np.random.normal(0,10,len(x))
    a, b = linear_least_squares(x,y)
    y_pred = a*x + b
    all_a += [a]
    all_b += [b]
    plt.plot(x,y_pred)

np.mean(all_a), np.mean(all_b)

(5.005095981078317, -15.002493519769928)

plt.hist(all_a);

Linear Least Squares (Key)¶

An aside about numpy matrices¶

Practice with matrices¶

Linear Least Squares Regression¶

Application¶

Application¶