In [1]:
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.optimize import minimize, minimize_scalar
import warnings
from collections import namedtuple
from pprint import pprint
from transforms import transforms, DummyVarsCfg
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

df = pd.read_csv("../data/w2-saq.csv")

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    df = transforms(df, drop_temp=False, dummy_cfg=DummyVarsCfg(drop_first=False, drop_source=False))
In [2]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from scipy.optimize import minimize
from utility import expected_hcost

# Objective function: minimize the mean squared gap
def objective(params, df):
    interest_rate, loan_term_years, deposit_rate, grant = params

    match (interest_rate, loan_term_years, deposit_rate, grant):
        case (interest_rate, _, _, _) if not (0.05 <= interest_rate <= 0.1):
            return np.inf
        case (_, loan_term_years, _, _) if not (1 <= loan_term_years <= 35):
            return np.inf
        case (_, _, deposit_rate, _) if not (0.10 <= deposit_rate <= 0.25):
            return np.inf
        case (_, _, _, grant) if not (15000 <= grant <= 50000):
            return np.inf
            
    expected = expected_hcost(df, interest_rate, loan_term_years, deposit_rate, grant)
    actual = df['hcost']
    return np.mean((expected - actual) ** 2)

# Starting values
init_params = [0.05, 30, 0.15, 15000]  # interest_rate, loan_term_years, deposit_rate

# Run optimization
result = minimize(objective, init_params, args=(df,), method='Nelder-Mead')

# Extract best-fit parameters
best_interest, best_term, best_deposit, best_grant = result.x

# Compute expected_hcost with optimal parameters
df['expected_hcost'] = expected_hcost(df, best_interest, best_term, best_deposit, best_grant)
df['gap'] = df['expected_hcost'] - df['hcost']

# Optional: fit final regression
model = smf.ols('gap ~ expected_hcost', data=df).fit()

# Output results
print(f"Optimized parameters:")
print(f"- Interest rate: {best_interest:.4f}")
print(f"- Loan term: {best_term:.1f} years")
print(f"- Deposit rate: {best_deposit:.4f}")
print(f"- Grant: {best_grant:.4f}")
print(model.summary())
Optimized parameters:
- Interest rate: 0.0501
- Loan term: 35.0 years
- Deposit rate: 0.2500
- Grant: 21097.4823
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    gap   R-squared:                       0.547
Model:                            OLS   Adj. R-squared:                  0.546
Method:                 Least Squares   F-statistic:                     360.0
Date:                Tue, 22 Apr 2025   Prob (F-statistic):           3.40e-53
Time:                        00:09:03   Log-Likelihood:                 367.92
No. Observations:                 300   AIC:                            -731.8
Df Residuals:                     298   BIC:                            -724.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         -0.2176      0.019    -11.519      0.000      -0.255      -0.180
expected_hcost     0.8192      0.043     18.975      0.000       0.734       0.904
==============================================================================
Omnibus:                       50.607   Durbin-Watson:                   2.038
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               71.508
Skew:                          -1.106   Prob(JB):                     2.97e-16
Kurtosis:                       3.912   Cond. No.                         12.4
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [3]:
# Compute expected_hcost with optimal parameters
df['expected_hcost'] = expected_hcost(df, 0.05, 30, 0.15, 15)
df['expected_hcost_log'] = np.log(df['expected_hcost'])
df['gap'] = df['expected_hcost'] - df['hcost']
df['gap_log'] = np.log(df['gap'])

# Optional: fit final regression
model = smf.ols('gap ~ expected_hcost', data=df).fit()
/Users/angusthomsen/code/data/env/lib/python3.12/site-packages/pandas/core/arraylike.py:399: RuntimeWarning: invalid value encountered in log
  result = getattr(ufunc, method)(*inputs, **kwargs)
In [14]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import griddata
from IPython.display import HTML

fig, axes = plt.subplots(3, 6, subplot_kw={'projection': '3d'}, figsize=(30, 15))

for row, y_var in enumerate(['hcost', 'expected_hcost', 'gap']):
    x = df['Tincome_log']
    y = df[y_var]
    z = df['pricesold_log']
    
    # Define grid
    xi = np.linspace(x.min(), x.max(), 50)
    yi = np.linspace(y.min(), y.max(), 50)
    X, Y = np.meshgrid(xi, yi)
    
    Z = griddata((x, y), z, (X, Y), method='linear')
    
    dZ_dx, dZ_dy = np.gradient(Z, xi, yi) 
    slope_magnitude = np.sqrt(dZ_dx**2 + dZ_dy**2)
    norm = Normalize(vmin=np.nanmin(slope_magnitude), vmax=np.nanmax(slope_magnitude))
    colors = cm.plasma(norm(slope_magnitude))  
    
    
    for i in range(0, 3):
        axes[row][i].plot_surface(X, Y, Z, facecolors=colors, rstride=1, cstride=1, linewidth=0, antialiased=False)
        axes[row][i].view_init(elev=30, azim=135 + i*45)
        axes[row][i+3].plot_surface(X, Y, Z, cmap='viridis', rstride=1, cstride=1, linewidth=0, antialiased=False)
        axes[row][i+3].view_init(elev=30, azim=135 + i*45)

plt.show()
plt.savefig(f"./figures/3d-hcost-gap.png")


def update(angle):
    axes[0].view_init(elev=30, azim=angle)
    axes[1].view_init(elev=30, azim=angle+50)
    return fig,

# ani = FuncAnimation(fig, update, frames=range(0, 360, 2), interval=50)
plt.show()
# HTML(ani.to_jshtml())
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
In [9]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from matplotlib import cm
from matplotlib.colors import Normalize

from scipy.interpolate import griddata
from IPython.display import HTML

x = df['Tincome_log']
y = df['hcost_log']
z = df['pricesold_log']

# Define grid
xi = np.linspace(x.min(), x.max(), 50)
yi = np.linspace(y.min(), y.max(), 50)
X, Y = np.meshgrid(xi, yi)

Z = griddata((x, y), z, (X, Y), method='linear')


dZ_dx, dZ_dy = np.gradient(Z, xi, yi)  # dz/dx and dz/dy
slope_magnitude = np.sqrt(dZ_dx**2 + dZ_dy**2)

norm = Normalize(vmin=np.nanmin(slope_magnitude), vmax=np.nanmax(slope_magnitude))
colors = cm.plasma(norm(slope_magnitude))  
fig, ax = plt.subplots(1, 1, subplot_kw={'projection': '3d'}, figsize=(10, 10))
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, linewidth=0, antialiased=False, facecolors=colors)

def update(angle):
    ax.view_init(elev=30, azim=angle)
    return fig,

ani = FuncAnimation(fig, update, frames=range(0, 360, 2), interval=50)
HTML(ani.to_jshtml())
Animation size has reached 20989285 bytes, exceeding the limit of 20971520.0. If you're sure you want a larger animation embedded, set the animation.embed_limit rc parameter to a larger value (in MB). This and further frames will be dropped.
Out[9]:
No description has been provided for this image
No description has been provided for this image
In [ ]: