import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy.integrate import quad
from scipy.stats import expon

def exponential_cdf(x, lam):
    f = lambda t: lam * np.exp(-lam * t)
    result, _ = quad(f, 0, x)
    return result

# examples 
lam = 2   
x = 1.5  
P = exponential_cdf(x, lam)
print(f"P(X ≤ {x}) for λ = {lam} is {P:.5f}")

lam = 1.5  
x = 3.5  
P = exponential_cdf(x, lam)
print(f"P(X ≤ {x}) for λ = {lam} is {P:.5f}")

P(X ≤ 1.5) for λ = 2 is 0.95021
P(X ≤ 3.5) for λ = 1.5 is 0.99475

# checking the results 
lam = 2   
x = 1.5
print(f"CDF for x:{x}, lambda:{lam} = {expon.cdf(x,scale=1/lam):.5f}")

lam = 1.5  
x = 3.5
print(f"CDF for x:{x}, lambda:{lam} = {expon.cdf(x,scale=1/lam):.5f}")

CDF for x:1.5, lambda:2 = 0.95021
CDF for x:3.5, lambda:1.5 = 0.99475

def laplacian_pdf(x, lam):
    return 0.5 * lam * np.exp(-lam * np.abs(x))

def laplacian_cdf(x, lam):
    x = np.array(x)
    F = np.where(x < 0, 0.5 * np.exp(lam * x), 1 - 0.5 * np.exp(-lam * x))
    return F

# plot for λ = 1
lam = 1
x = np.linspace(-5, 5, 400)

pdf_values = laplacian_pdf(x, lam)
cdf_values = laplacian_cdf(x, lam)

plt.figure(figsize=(6, 4))
plt.plot(x, pdf_values, label=r'$f(x) = \frac{1}{2}\lambda e^{-\lambda |x|}$', color='blue')
plt.title("Laplacian Probability Density Function (λ = 1)")
plt.xlabel("x")
plt.ylabel("f(x)")
plt.grid(True)
plt.legend()
plt.show()

from scipy.stats import expon

#  P(X>s+t| X>s) = P(X>t)

def P_exponential(u):
    # as shown above this function is equal to 1-F(u)
    result = 1 - expon.cdf(u)
    return result

# example to show the memoryless property of the exp dist
s = 10
t = 20
r1 = P_exponential(s+t)
r2 = P_exponential(s)
r3 = P_exponential(t)

final_result = r1 / r2
print(f"P(X>s+t| X>s) = P(X>s+t)/P(X>s) = {r1/r2:.13f} and P(X>t) = {r3:.13f}")
print(f"The difference, (P(X>s+t)/P(X>s) - P(X>t)), is {(r1/r2)-r3:.13f} which is approximately 0.")
print(f"This demonstrates the above property numerically.")

P(X>s+t| X>s) = P(X>s+t)/P(X>s) = 0.0000000020615 and P(X>t) = 0.0000000020612
The difference, (P(X>s+t)/P(X>s) - P(X>t)), is 0.0000000000003 which is approximately 0.
This demonstrates the above property numerically.

from scipy.stats import weibull_min
import math

beta = 0.5   
alpha = 750  
nu = 0  

# (a) mean time to failure
# Mean of Weibull = α * Γ(1 + 1/β)
mean_time_to_failure = alpha * math.gamma(1 + 1/beta)
print("Mean time to failure:", mean_time_to_failure, "hours")

# (b) probability of failure within 2500 hours
t = 2500
prob_failure_2500 = weibull_min.cdf(t, beta, loc=nu, scale=alpha)
print("Probability of failure within 2500 hours:", prob_failure_2500)
print("Percentage of widgets failed by 2500 hours:", "%", prob_failure_2500 * 100)

Mean time to failure: 1500.0 hours
Probability of failure within 2500 hours: 0.8389019121733734
Percentage of widgets failed by 2500 hours: % 83.89019121733735

from scipy.stats import norm
from scipy.optimize import minimize_scalar

mu = 3
sigma = 1

def neg_pdf(x):
    return -norm.pdf(x, mu, sigma)

# find x that minimizes the negative PDF (== maximizes the PDF)
result = minimize_scalar(neg_pdf, bounds=(0, 6), method='bounded')

x_max = result.x
pdf_max = norm.pdf(x_max, mu, sigma)

print(f"The maximum of PDF occurs at x = {x_max:.4f}")
print(f"Maximum of PDF = {pdf_max:.4f}")

The maximum of PDF occurs at x = 3.0000
Maximum of PDF = 0.3989

from scipy.stats import norm

np.random.seed(0) 

ns = [2, 15, 45]
n_reps = 500

results = {}

for n in ns:
    x = np.random.randn(n, n_reps)
    xbar = x.mean(axis=0) # sample mean
    emp_mean = np.mean(xbar)
    emp_var = np.var(xbar, ddof=0)  # population variance of the sample means
    theory_mean = 0.0
    theory_var = 1.0 / n  # xar(Xbar) = var(X)/n, here var(X)=1

    results[n] = {
        "xbar": xbar,
        "emp_mean": emp_mean,
        "emp_var": emp_var,
        "theory_mean": theory_mean,
        "theory_var": theory_var
    }

    plt.figure(figsize=(5,3))
    count, bins, _ = plt.hist(xbar, bins=20, density=True, alpha=0.6)
    # theoretical normal density with mean=0 and std = sqrt(1/n)
    xs = np.linspace(bins[0], bins[-1], 200)
    pdf = norm.pdf(xs, loc=theory_mean, scale=np.sqrt(theory_var))
    plt.plot(xs, pdf, linewidth=2, label=f'Normal($0$, {1/n:.4f})')
    plt.title(f'Histogram of Sample Means (n={n}, reps={n_reps})')
    plt.xlabel('sample mean')
    plt.legend()
    plt.grid(True)
    plt.show()

print("empirical vs theoretical values:\n")
for n in ns:
    r = results[n]
    print(f"n = {n}:")
    print(f"  empirical mean of xbar = {r['emp_mean']:.5f}")
    print(f"  theoretical mean = {r['theory_mean']:.5f}")
    print(f"  empirical var of xbar  = {r['emp_var']:.5f}")
    print(f"  theoretical var = {r['theory_var']:.5f}")
    print()

empirical vs theoretical values:

n = 2:
  empirical mean of xbar = -0.04526
  theoretical mean = 0.00000
  empirical var of xbar  = 0.46706
  theoretical var = 0.50000

n = 15:
  empirical mean of xbar = -0.01329
  theoretical mean = 0.00000
  empirical var of xbar  = 0.06997
  theoretical var = 0.06667

n = 45:
  empirical mean of xbar = -0.00006
  theoretical mean = 0.00000
  empirical var of xbar  = 0.02135
  theoretical var = 0.02222

np.random.seed(0)
n = 1000
x = np.random.uniform(0, 1, n)

x_sorted = np.sort(x)
y = np.arange(1, n + 1) / n

# plot empirical distribution function
plt.figure(figsize=(5,3))
plt.step(x_sorted, y, where='post', label='Empirical CDF')
plt.xlim(-0.5, 1.5)
plt.ylim(0, 1.1)
plt.xlabel('x')
plt.ylabel('F(x)')
plt.title('Empirical Distribution Function for Uniform(0,1)')
plt.grid(True)
plt.legend()
plt.show()

np.random.seed(0)

n = 100
x = np.random.randn(n) * np.sqrt(2) + 10 

x_sorted = np.sort(x)
y = np.arange(1, n+1) / n  # empirical CDF values

plt.figure(figsize=(6,3))
plt.step(x_sorted, y, where='post', label='Empirical CDF')
plt.xlim(x_sorted.min() - 1, x_sorted.max() + 1)
plt.ylim(-0.02, 1.02)
plt.xlabel('x')
plt.ylabel('F_n(x)')
plt.title('Empirical CDF for sample from N(10, var=2), n=100')
plt.grid(True)
plt.legend()
plt.show()

x_less = x_sorted.min() - 0.5
x_greater = x_sorted.max() + 0.5

edf_less = np.searchsorted(x_sorted, x_less, side='right') / n
edf_greater = np.searchsorted(x_sorted, x_greater, side='right') / n

print(f"min sample = {x_sorted.min():.4f}, max sample = {x_sorted.max():.4f}")
print(f"EDF at x which is less than the smallest => F_n({x_less:.4f}) = {edf_less:.4f}")
print(f"EDF at x which is greater than the largest => F_n({x_greater:.4f}) = {edf_greater:.4f}")

min sample = 6.3895, max sample = 13.2099
EDF at x which is less than the smallest => F_n(5.8895) = 0.0000
EDF at x which is greater than the largest => F_n(13.7099) = 1.0000

def quartile_skewness(sample):
    q25 = np.percentile(sample, 25)
    q50 = np.percentile(sample, 50)
    q75 = np.percentile(sample, 75)
    gamma_1q = (q75 - 2*q50 + q25) / (q75 - q25)
    return gamma_1q

# example
data = np.random.normal(0, 1, 1000)
result = quartile_skewness(data)
print("Quartile coefficient of skewness:", result)

Quartile coefficient of skewness: 0.030248664926202085

def simulation(n):
    M = 20000      # Number of repetitions
    true_variance = 1.0 
    np.random.seed(0)

    # store the estimated variances from each simulation
    mle_variances = np.zeros(M)

    # simulation
    for i in range(M):
        sample = np.random.randn(n)

        # Calculate the MLE for the variance
        # np.var() with ddof=0 performs the MLE calculation, dividing by 'n'
        # sigma_hat_sq = (1/n) * sum((x_i - x_bar)^2)
        sigma_hat_sq = np.var(sample, ddof=0)

        mle_variances[i] = sigma_hat_sq

    # Expected Value and Bias 
    estimated_expected_value = np.mean(mle_variances)
    estimated_bias = estimated_expected_value - true_variance
    estimated_variance_of_mle = np.var(mle_variances)

    # Theoretical Expected Value: E[sigma^2_hat] = (n-1)/n * sigma^2
    theoretical_expected_value = (n - 1) / n * true_variance

    # Theoretical Bias: Bias(sigma^2_hat) = -1/n * sigma^2
    theoretical_bias = -(1 / n) * true_variance

    # Theoretical Variance: Var(sigma^2_hat) = 2(n-1)/n^2 * sigma^4
    theoretical_variance = (2 * (n - 1) / (n**2)) * (true_variance**2)

    print(f"Sample Size (n): {n}")
    print(f"Number of Simulations (M): {M}")
    print(f"True Variance (sigma^2): {true_variance}")
    print("-" * 59)
    print("                 | Simulation Estimate | Theoretical Value |")
    print("-" * 59)
    print(f"E[sigma^2_hat]  |  {estimated_expected_value:<19.5f} | {theoretical_expected_value:<17.5f} |")
    print(f"Bias            |  {estimated_bias:<19.5f} | {theoretical_bias:<17.5f} |")
    print(f"Var(sigma^2_hat)|  {estimated_variance_of_mle:<19.5f} | {theoretical_variance:<17.5f} |")
    print("-" * 59)
    print(f"")
    print(f"")

n = [5, 10, 15, 50 ,100] #different sample sizes
for size in n:
    simulation(size)

Sample Size (n): 5
Number of Simulations (M): 20000
True Variance (sigma^2): 1.0
-----------------------------------------------------------
                 | Simulation Estimate | Theoretical Value |
-----------------------------------------------------------
E[sigma^2_hat]  |  0.79626             | 0.80000           |
Bias            |  -0.20374            | -0.20000          |
Var(sigma^2_hat)|  0.31885             | 0.32000           |
-----------------------------------------------------------


Sample Size (n): 10
Number of Simulations (M): 20000
True Variance (sigma^2): 1.0
-----------------------------------------------------------
                 | Simulation Estimate | Theoretical Value |
-----------------------------------------------------------
E[sigma^2_hat]  |  0.89891             | 0.90000           |
Bias            |  -0.10109            | -0.10000          |
Var(sigma^2_hat)|  0.18013             | 0.18000           |
-----------------------------------------------------------


Sample Size (n): 15
Number of Simulations (M): 20000
True Variance (sigma^2): 1.0
-----------------------------------------------------------
                 | Simulation Estimate | Theoretical Value |
-----------------------------------------------------------
E[sigma^2_hat]  |  0.93251             | 0.93333           |
Bias            |  -0.06749            | -0.06667          |
Var(sigma^2_hat)|  0.12297             | 0.12444           |
-----------------------------------------------------------


Sample Size (n): 50
Number of Simulations (M): 20000
True Variance (sigma^2): 1.0
-----------------------------------------------------------
                 | Simulation Estimate | Theoretical Value |
-----------------------------------------------------------
E[sigma^2_hat]  |  0.98011             | 0.98000           |
Bias            |  -0.01989            | -0.02000          |
Var(sigma^2_hat)|  0.03885             | 0.03920           |
-----------------------------------------------------------


Sample Size (n): 100
Number of Simulations (M): 20000
True Variance (sigma^2): 1.0
-----------------------------------------------------------
                 | Simulation Estimate | Theoretical Value |
-----------------------------------------------------------
E[sigma^2_hat]  |  0.98905             | 0.99000           |
Bias            |  -0.01095            | -0.01000          |
Var(sigma^2_hat)|  0.01967             | 0.01980           |
-----------------------------------------------------------

Sample Size ($n$)	Estimated $E[\hat{\sigma}^2]$	True $\sigma^2$	Comparison
5	0.79626	1.0	$E[\hat{\sigma}^2] < \sigma^2$
10	0.89891	1.0	$E[\hat{\sigma}^2] < \sigma^2$
15	0.93251	1.0	$E[\hat{\sigma}^2] < \sigma^2$
50	0.98011	1.0	$E[\hat{\sigma}^2] < \sigma^2$
100	0.98905	1.0	$E[\hat{\sigma}^2] < \sigma^2$

ASSIGNMENT 3¶

Question 1¶

Question 2¶

Derivation of CDF of the Laplacian¶

Question 3¶

Question 4¶

Question 5¶

Question 6¶

Question 7¶

Question 8¶

Question 9¶

Question 10¶

Comparison of $E[\hat{\sigma}^2]$ to $\sigma^2 = 1.0$¶

Estimated Bias from Simulation¶

Sample Size ($n$)	Estimated $E[\hat{\sigma}^2]$	True $\sigma^2$	Estimated Bias	Theoretical Bias ($-\frac{1}{n}$)
5	0.79626	1.0	-0.20374	-0.20000
10	0.89891	1.0	-0.10109	-0.10000
15	0.93251	1.0	-0.06749	-0.06667
50	0.98011	1.0	-0.01989	-0.02000
100	0.98905	1.0	-0.01095	-0.01000