# Data Generation
np.random.seed(42)
m, n = 60, 100  # m < n: Underdetermined system
A = np.random.randn(m, n)
x0 = np.zeros(n)
# Create a sparse signal with 10 active components
active_indices = np.random.choice(n, 10, replace=False)
x0[active_indices] = np.random.uniform(1, 5, 10)

y = A @ x0

from scipy.optimize import linprog

## Method 1

vF = np.ones(2 * n)

mAeq = np.hstack((A, -A))
vBeq = y

vLowerBound = np.zeros(2 * n)
vUpperBound = np.inf * np.ones(2 * n)

res = linprog(vF, A_eq=mAeq, b_eq=vBeq, bounds=list(zip(vLowerBound, vUpperBound)))

vX = res.x[:n] - res.x[n:]

np.allclose(x0, vX)

True

import cvxpy as cp
import numpy as np

x = cp.Variable(n)

# Create the optimization problem
objective = cp.Minimize(cp.norm(x, 1))
constraints = [A @ x == y]
problem = cp.Problem(objective, constraints)

# Solve the problem
problem.solve()

# Retrieve the solution
x_sol = x.value

print("obj val=", problem.solve())

print(np.allclose(x0, x_sol))

max_diff = np.max(np.abs(x0 - x_sol))
print(f"Max Absolute Error: {max_diff}")

obj val= 29.221498107441164
False
Max Absolute Error: 1.769688573460826e-08

from IPython.display import Image, display

# Adjust the file path as necessary
file_path = "qp.jpg"

# Display the image with 80% width
display(Image(filename=file_path, width=600))

import numpy as np

def soft_thresholding(rho, lam):
    """Standard soft-thresholding operator."""
    if rho < -lam:
        return rho + lam
    elif rho > lam:
        return rho - lam
    else:
        return 0

def lasso_coordinate_descent_optimized(X, y, lam, num_iters=1000, tol=1e-4):
    """
    Improved Lasso Coordinate Descent
    - Handles unnormalized features
    - Optimized residual updates O(MN)
    - Includes unpenalized intercept
    """
    m, n = X.shape
    # 1. Standardize/Center (Highly recommended for LASSO)
    # For this implementation, we assume X is centered or we calculate intercept
    
    beta = np.zeros(n)
    intercept = np.mean(y) # Initial intercept
    
    # Precompute squared norms of columns to handle unnormalized X
    sq_norms = np.sum(X**2, axis=0)
    
    # Maintain the residual vector: r = y - (X@beta + intercept)
    residual = y - (X @ beta + intercept)
    
    for iteration in range(num_iters):
        beta_old = beta.copy()
        
        # Update Intercept (unpenalized)
        intercept_change = np.mean(residual)
        intercept += intercept_change
        residual -= intercept_change
        
        # Update Coefficients
        for j in range(n):
            X_j = X[:, j]
            
            # rho_j = X_j^T * (y - (X@beta - beta_j*X_j + intercept))
            # Logic: y - X@beta - intercept is our current 'residual'
            # So, partial_residual = residual + beta[j] * X_j
            rho = np.dot(X_j, residual) + beta[j] * sq_norms[j]
            
            # Apply soft thresholding
            beta[j] = soft_thresholding(rho, lam) / sq_norms[j]
            
            # Update the residual vector efficiently
            # r_new = r_old + (beta_old - beta_new) * X_j
            diff = beta_old[j] - beta[j]
            if diff != 0:
                residual += diff * X_j
        
        # Check convergence: Max coordinate change
        if np.max(np.abs(beta - beta_old)) < tol:
            break
            
    return intercept, beta

# --- Testing the implementation ---
# Generate sparse data
np.random.seed(42)
n_samples, n_features = 50, 100
n, p = 50, 100
X = np.random.randn(n_samples, n_features)
true_beta = np.zeros(n_features)
true_beta[[5, 10, 15, 20, 25]] = [5, -4, 3, -2, 1] # Only 5 non-zero weights
y = X @ true_beta + np.random.normal(2, 1, n_samples)

# 2. Run Optimized Lasso
lam = 15.0
intercept, beta_hat = lasso_coordinate_descent_optimized(X, y, lam)

print(f"Number of non-zero features: {np.sum(beta_hat != 0)}")
print(f"Active feature indices: {np.where(beta_hat != 0)[0]}")
print(f"Intercept: {intercept}")

Number of non-zero features: 8
Active feature indices: [ 5 10 12 15 18 20 25 84]
Intercept: 2.1962787556654955

import matplotlib.pyplot as plt
# --- Reusable Plotting Functions ---
def plot_nonzero_coefficients(beta_hat, beta_true=None, title="Coefficient Estimates"):
    """Plots a stem plot of the coefficients."""
    plt.figure(figsize=(10, 4))
    indices = np.arange(len(beta_hat))
    
    if beta_true is not None:
        plt.stem(indices, beta_true, linefmt='g-', markerfmt='go', basefmt='k-', label='True')
        
    plt.stem(indices, beta_hat, linefmt='r--', markerfmt='rx', basefmt='k-', label='Estimated')
    plt.title(title)
    plt.xlabel("Coefficient Index")
    plt.ylabel("Value")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

plot_nonzero_coefficients(beta_hat, true_beta, title=f"Beta Estimates (Lambda={lam:.4f})")

# Change of lambda
lam = 5.0
intercept, beta_hat = lasso_coordinate_descent_optimized(X, y, lam)

plot_nonzero_coefficients(beta_hat, true_beta, title=f"Beta Estimates (Lambda={lam:.4f})")

import numpy as np
import matplotlib.pyplot as plt

def soft_thresholding(rho, lam):
    """Standard soft-thresholding operator."""
    return np.sign(rho) * np.maximum(np.abs(rho) - lam, 0)

def lasso_greedy_coordinate_descent(X, y, lambda_, num_iters=1000, tol=1e-4):
    m, n = X.shape
    beta = np.zeros(n)
    sq_norms = np.sum(X**2, axis=0)
    
    # Maintain residual: r = y - X@beta. Initially beta=0, so r=y
    residual = y.copy()
    
    for iteration in range(num_iters):
        # Calculate potential updates for ALL coordinates
        # rho_j = X_j^T * (residual + beta_j * X_j)
        rhos = np.dot(X.T, residual) + beta * sq_norms
        
        # Calculate what the new beta would be for all j
        new_betas = soft_thresholding(rhos, lambda_) / sq_norms
        
        # Greedy Selection: Find the coordinate that changes the most
        changes = np.abs(new_betas - beta)
        max_index = np.argmax(changes)
        
        if changes[max_index] < tol:
            break
            
        # Update only the best coordinate
        diff = new_betas[max_index] - beta[max_index]
        beta[max_index] = new_betas[max_index]
        
        # Update residual: r_new = r_old - X_j * delta_beta_j
        residual -= X[:, max_index] * diff
        
    return beta


def plot_solution_paths(lambda_values, coefficients):
    """Plots the path of coefficients as lambda changes."""
    plt.figure(figsize=(10, 6))
    plt.semilogx(lambda_values, coefficients)
    plt.gca().invert_xaxis()  # Usually shown from large lambda to small
    plt.xlabel('Lambda (log scale)')
    plt.ylabel('Coefficients')
    plt.title('Lasso Solution Paths')
    plt.grid(True, alpha=0.3)
    plt.show()

def plot_prediction_error(lambda_values, errors):
    """Plots the Prediction Error (MSE) vs Lambda."""
    plt.figure(figsize=(10, 4))
    plt.semilogx(lambda_values, errors, color='blue', marker='o', markersize=3)
    plt.gca().invert_xaxis()
    
    # Highlight the best lambda
    best_idx = np.argmin(errors)
    plt.axvline(lambda_values[best_idx], color='red', linestyle='--', 
                label=f'Min Error at $\lambda$={lambda_values[best_idx]:.4f}')
    
    plt.xlabel('Lambda (log scale)')
    plt.ylabel('Mean Squared Error')
    plt.title('Prediction Error for Different Lambda Values')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

# 1. Setup Data
np.random.seed(42)
m, n = 100, 200  # Underdetermined system (m < n)
X = np.random.randn(m, n)

true_beta = np.zeros(n)
true_beta[[5, 10, 15, 20, 25]] = [5, -4, 3, -2, 1] # Only 5 non-zero weights
y = X @ true_beta + np.random.normal(2, 1, m)


# Test set for prediction error
X_test = np.random.randn(m, n)
y_test = X_test @ true_beta + np.random.normal(2, 1, m)

# 2. Run LASSO over a range of Lambdas
lambda_values = np.logspace(3, 0, 50)
all_coeffs = []
errors = []

for l in lambda_values:
    beta = lasso_greedy_coordinate_descent(X, y, l)
    all_coeffs.append(beta)
    
    # Calculate Prediction Error
    y_pred = X_test @ beta
    mse = np.mean((y_test - y_pred)**2)
    errors.append(mse)

all_coeffs = np.array(all_coeffs)

# 3. Visualize Results
# Plot coefficients for the "best" lambda found
best_lambda_idx = np.argmin(errors)
best_beta = all_coeffs[best_lambda_idx]

plot_nonzero_coefficients(best_beta, true_beta, title=f"Best Estimates (Lambda={lambda_values[best_lambda_idx]:.4f})")
plot_solution_paths(lambda_values, all_coeffs)
plot_prediction_error(lambda_values, errors)

# Adjust the file path as necessary
file_path = "ridge.jpg"

# Display the image with 80% width
display(Image(filename=file_path, width=600))

# Adjust the file path as necessary
file_path = "lasso.jpg"

# Display the image with 80% width
display(Image(filename=file_path, width=600))

import numpy as np
import matplotlib.pyplot as plt
from skimage import io

# Load the combined image
image = io.imread('lena.jpg', as_gray=True)

# Get image dimensions
height, width = image.shape

# Split into left and right halves
midpoint = width // 2
lena_noisy = image[:, :midpoint]
lena_clean = image[:, midpoint:]

# Display both images
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.title("Lena Noisy")
plt.imshow(lena_noisy, cmap='gray')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.title("Lena Clean")
plt.imshow(lena_clean, cmap='gray')
plt.axis('off')

plt.tight_layout()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from skimage import io, color
from skimage.restoration import denoise_tv_chambolle

# Apply total variation denoising (isotropic TV)
lambda_tv = 0.1  # regularization strength
img_denoised = denoise_tv_chambolle(lena_noisy, weight=lambda_tv)

# Plot
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.title("Original (Noisy)")
plt.imshow(lena_noisy, cmap='gray')
plt.axis('off')

plt.subplot(1, 3, 2)
plt.title("Denoised (TV)")
plt.imshow(img_denoised, cmap='gray')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.title("Ground Truth")
plt.imshow(lena_clean, cmap='gray')
plt.axis('off')

plt.tight_layout()
plt.show()

import cvxpy as cp

Y = lena_noisy  # input image
n, m = Y.shape
print(n,m)
X = cp.Variable((n, m))

tv_penalty = cp.tv(X)
# Denoising objective
lambda_tv = 0.1
objective = cp.Minimize(0.5 * cp.sum_squares(X - Y) + lambda_tv * tv_penalty)

# Solve
problem = cp.Problem(objective)
problem.solve()

# Result
img_denoised = X.value

351 343

# Plot
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.title("Original (Noisy)")
plt.imshow(lena_noisy, cmap='gray')
plt.axis('off')

plt.subplot(1, 3, 2)
plt.title("Denoised (TV)")
plt.imshow(img_denoised, cmap='gray')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.title("Ground Truth")
plt.imshow(lena_clean, cmap='gray')
plt.axis('off')

plt.tight_layout()
plt.show()

STAT 207: Advanced Optimization Topics¶

Linear Programming¶

Examples¶

Basis Pursuit ($L_1$-Minimization)¶

Quantile regression¶

$\ell_1$ Regression¶

Dantzig selector¶

Quadratic Programming¶

Examples¶

Lasso Problem¶