import numpy as np
from scipy.special import expit

# Generate random data
np.random.seed(52)
n = 100
p = 3
X = np.random.randn(n, p)
true_beta = np.array([1.5, -0.5, 2.0])
eta = np.dot(X, true_beta)
probabilities = expit(eta)
y = np.random.binomial(1, probabilities)

# Initialize parameters
beta = np.zeros(p)
max_iter = 100
tolerance = 1e-6

# Fisher scoring algorithm
for _ in range(max_iter):
    eta = np.dot(X, beta)
    mu = expit(eta)
    W = np.diag(mu * (1 - mu))
    z = eta + (y - mu) / (mu * (1 - mu))
    gradient = np.dot(X.T, y - mu)
    hessian = np.dot(X.T, np.dot(W, X))
    
    # Update beta using Fisher scoring update
    beta_new = beta + np.linalg.solve(hessian, gradient)
    
    # Check convergence
    if np.linalg.norm(beta_new - beta, ord = np.inf) < tolerance:
        break
    
    beta = beta_new

print("Estimated beta:", beta)

Estimated beta: [ 1.34306021 -1.01835205  2.49613586]

import statsmodels.api as sm
import numpy as np

# Generate some random data
np.random.seed(0)
n = 100
X = np.random.randn(n, 2)
y = np.random.binomial(1, 0.5, n)

# Add constant column to X
X = sm.add_constant(X)

# Fit a logistic regression model using GLM
model = sm.GLM(y, X, family=sm.families.Binomial())
result = model.fit()

# Print the summary of the model
print(result.summary())

                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:                      y   No. Observations:                  100
Model:                            GLM   Df Residuals:                       97
Model Family:                Binomial   Df Model:                            2
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -67.688
Date:                Tue, 20 May 2025   Deviance:                       135.38
Time:                        09:29:44   Pearson chi2:                     100.
No. Iterations:                     4   Pseudo R-squ. (CS):            0.02227
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1623      0.205     -0.793      0.428      -0.563       0.239
x1             0.0169      0.200      0.085      0.933      -0.374       0.408
x2            -0.2999      0.203     -1.476      0.140      -0.698       0.098
==============================================================================

import warnings
warnings.filterwarnings('ignore')

# Your code generating the warning
import numpy as np
import matplotlib.pyplot as plt

# Define the Rosenbrock function
def rosenbrock(x, y):
    return (1 - x)**2 + 100 * (y - x**2)**2

# Create a grid of x, y values
x = np.linspace(-2, 2, 100)
y = np.linspace(-1, 3, 100)
X, Y = np.meshgrid(x, y)

# Compute the Rosenbrock function values for each point in the grid
Z = rosenbrock(X, Y)

# Plot the Rosenbrock function
fig = plt.figure()
# ax = fig.gca(projection='3d')
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, cmap='viridis')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('Rosenbrock(x, y)')
ax.set_title('Rosenbrock Function')
plt.show()

import numpy as np
from scipy.optimize import minimize

# Define the Rosenbrock function
def rosenbrock(x):
    return np.sum(100 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)

# Define the gradient of the Rosenbrock function
def rosenbrock_gradient(x):
    n = len(x)
    gradient = np.zeros_like(x)
    gradient[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
    gradient[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
    gradient[-1] = 200 * (x[-1] - x[-2]**2)
    return gradient

# Define the Hessian of the Rosenbrock function
def rosenbrock_hessian(x):
    n = len(x)
    hessian = np.zeros((n, n))
    hessian[0, 0] = 1200 * x[0]**2 - 400 * x[1] + 2
    hessian[0, 1] = -400 * x[0]
    hessian[1, 0] = -400 * x[0]
    for i in range(1, n - 1):
        hessian[i, i] = 202 + 1200 * x[i]**2 - 400 * x[i+1]
        hessian[i, i+1] = -400 * x[i]
        hessian[i+1, i] = -400 * x[i]
    hessian[-1, -2] = -400 * x[-2]
    hessian[-2, -1] = -400 * x[-2]
    hessian[-1, -1] = 200
    return hessian

# Initial guess
x0 = np.array([1, 2, 3])

# Minimize using BFGS
bfgs_result = minimize(rosenbrock, x0, method='BFGS', jac=rosenbrock_gradient, hess=rosenbrock_hessian)

# Minimize using Gradient Descent
gd_result = minimize(rosenbrock, x0, method='CG', jac=rosenbrock_gradient)

# Print the convergence information
print("BFGS:")
print(bfgs_result)
print("\nGD:")
print(gd_result)

BFGS:
  message: Optimization terminated successfully.
  success: True
   status: 0
      fun: 8.179817545802151e-14
        x: [ 1.000e+00  1.000e+00  1.000e+00]
      nit: 25
      jac: [ 2.919e-06 -6.660e-06  2.322e-06]
 hess_inv: [[ 9.273e-02  1.830e-01  3.668e-01]
            [ 1.830e-01  3.659e-01  7.336e-01]
            [ 3.668e-01  7.336e-01  1.475e+00]]
     nfev: 31
     njev: 31

GD:
 message: Optimization terminated successfully.
 success: True
  status: 0
     fun: 4.942476963882118e-12
       x: [ 1.000e+00  1.000e+00  1.000e+00]
     nit: 45
     jac: [ 3.448e-07  1.027e-06  1.883e-06]
    nfev: 98
    njev: 98

## jupyter nbconvert --TagRemovePreprocessor.remove_input_tags='{"hide_code"}' --to pdf Newton.ipynb

STAT 207: Newton's Method¶

Notations¶

Newton's Method¶

Fisher's scoring method¶

Nonlinear least squares - Gauss-Newton method (NAS 14.4-14.6)¶

Generalized linear models (GLMs)¶

GLM¶

Python Implementations of GLM¶

Quasi-Newton Methods¶

Quasi-Newton¶

Davidon-Fletcher-Powell (DFP) rank-2 update.¶

Broyden-Fletcher-Goldfarb-Shanno (BFGS) rank 2 update¶