In [1]:
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.optimize import minimize, minimize_scalar
import warnings
from collections import namedtuple
from pprint import pprint
from transforms import transforms, DummyVarsCfg
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
df = pd.read_csv("../data/w2-saq.csv")
with warnings.catch_warnings():
warnings.simplefilter("ignore")
df = transforms(df, drop_temp=False, dummy_cfg=DummyVarsCfg(drop_first=False, drop_source=False))
In [2]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from scipy.optimize import minimize
from utility import expected_hcost
# Objective function: minimize the mean squared gap
def objective(params, df):
interest_rate, loan_term_years, deposit_rate, grant = params
match (interest_rate, loan_term_years, deposit_rate, grant):
case (interest_rate, _, _, _) if not (0.05 <= interest_rate <= 0.1):
return np.inf
case (_, loan_term_years, _, _) if not (1 <= loan_term_years <= 35):
return np.inf
case (_, _, deposit_rate, _) if not (0.10 <= deposit_rate <= 0.25):
return np.inf
case (_, _, _, grant) if not (15000 <= grant <= 50000):
return np.inf
expected = expected_hcost(df, interest_rate, loan_term_years, deposit_rate, grant)
actual = df['hcost']
return np.mean((expected - actual) ** 2)
# Starting values
init_params = [0.05, 30, 0.15, 15000] # interest_rate, loan_term_years, deposit_rate
# Run optimization
result = minimize(objective, init_params, args=(df,), method='Nelder-Mead')
# Extract best-fit parameters
best_interest, best_term, best_deposit, best_grant = result.x
# Compute expected_hcost with optimal parameters
df['expected_hcost'] = expected_hcost(df, best_interest, best_term, best_deposit, best_grant)
df['gap'] = df['expected_hcost'] - df['hcost']
# Optional: fit final regression
model = smf.ols('gap ~ expected_hcost', data=df).fit()
# Output results
print(f"Optimized parameters:")
print(f"- Interest rate: {best_interest:.4f}")
print(f"- Loan term: {best_term:.1f} years")
print(f"- Deposit rate: {best_deposit:.4f}")
print(f"- Grant: {best_grant:.4f}")
print(model.summary())
Optimized parameters: - Interest rate: 0.0501 - Loan term: 35.0 years - Deposit rate: 0.2500 - Grant: 21097.4823 OLS Regression Results ============================================================================== Dep. Variable: gap R-squared: 0.547 Model: OLS Adj. R-squared: 0.546 Method: Least Squares F-statistic: 360.0 Date: Tue, 22 Apr 2025 Prob (F-statistic): 3.40e-53 Time: 00:09:03 Log-Likelihood: 367.92 No. Observations: 300 AIC: -731.8 Df Residuals: 298 BIC: -724.4 Df Model: 1 Covariance Type: nonrobust ================================================================================== coef std err t P>|t| [0.025 0.975] ---------------------------------------------------------------------------------- Intercept -0.2176 0.019 -11.519 0.000 -0.255 -0.180 expected_hcost 0.8192 0.043 18.975 0.000 0.734 0.904 ============================================================================== Omnibus: 50.607 Durbin-Watson: 2.038 Prob(Omnibus): 0.000 Jarque-Bera (JB): 71.508 Skew: -1.106 Prob(JB): 2.97e-16 Kurtosis: 3.912 Cond. No. 12.4 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [3]:
# Compute expected_hcost with optimal parameters
df['expected_hcost'] = expected_hcost(df, 0.05, 30, 0.15, 15)
df['expected_hcost_log'] = np.log(df['expected_hcost'])
df['gap'] = df['expected_hcost'] - df['hcost']
df['gap_log'] = np.log(df['gap'])
# Optional: fit final regression
model = smf.ols('gap ~ expected_hcost', data=df).fit()
/Users/angusthomsen/code/data/env/lib/python3.12/site-packages/pandas/core/arraylike.py:399: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
In [14]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import griddata
from IPython.display import HTML
fig, axes = plt.subplots(3, 6, subplot_kw={'projection': '3d'}, figsize=(30, 15))
for row, y_var in enumerate(['hcost', 'expected_hcost', 'gap']):
x = df['Tincome_log']
y = df[y_var]
z = df['pricesold_log']
# Define grid
xi = np.linspace(x.min(), x.max(), 50)
yi = np.linspace(y.min(), y.max(), 50)
X, Y = np.meshgrid(xi, yi)
Z = griddata((x, y), z, (X, Y), method='linear')
dZ_dx, dZ_dy = np.gradient(Z, xi, yi)
slope_magnitude = np.sqrt(dZ_dx**2 + dZ_dy**2)
norm = Normalize(vmin=np.nanmin(slope_magnitude), vmax=np.nanmax(slope_magnitude))
colors = cm.plasma(norm(slope_magnitude))
for i in range(0, 3):
axes[row][i].plot_surface(X, Y, Z, facecolors=colors, rstride=1, cstride=1, linewidth=0, antialiased=False)
axes[row][i].view_init(elev=30, azim=135 + i*45)
axes[row][i+3].plot_surface(X, Y, Z, cmap='viridis', rstride=1, cstride=1, linewidth=0, antialiased=False)
axes[row][i+3].view_init(elev=30, azim=135 + i*45)
plt.show()
plt.savefig(f"./figures/3d-hcost-gap.png")
def update(angle):
axes[0].view_init(elev=30, azim=angle)
axes[1].view_init(elev=30, azim=angle+50)
return fig,
# ani = FuncAnimation(fig, update, frames=range(0, 360, 2), interval=50)
plt.show()
# HTML(ani.to_jshtml())
<Figure size 640x480 with 0 Axes>
In [9]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import griddata
from IPython.display import HTML
x = df['Tincome_log']
y = df['hcost_log']
z = df['pricesold_log']
# Define grid
xi = np.linspace(x.min(), x.max(), 50)
yi = np.linspace(y.min(), y.max(), 50)
X, Y = np.meshgrid(xi, yi)
Z = griddata((x, y), z, (X, Y), method='linear')
dZ_dx, dZ_dy = np.gradient(Z, xi, yi) # dz/dx and dz/dy
slope_magnitude = np.sqrt(dZ_dx**2 + dZ_dy**2)
norm = Normalize(vmin=np.nanmin(slope_magnitude), vmax=np.nanmax(slope_magnitude))
colors = cm.plasma(norm(slope_magnitude))
fig, ax = plt.subplots(1, 1, subplot_kw={'projection': '3d'}, figsize=(10, 10))
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, linewidth=0, antialiased=False, facecolors=colors)
def update(angle):
ax.view_init(elev=30, azim=angle)
return fig,
ani = FuncAnimation(fig, update, frames=range(0, 360, 2), interval=50)
HTML(ani.to_jshtml())
Animation size has reached 20989285 bytes, exceeding the limit of 20971520.0. If you're sure you want a larger animation embedded, set the animation.embed_limit rc parameter to a larger value (in MB). This and further frames will be dropped.
Out[9]: