In [1]:
import statsmodels.formula.api as smf
import warnings
from IPython.display import HTML
from transforms import transforms, DummyVarsCfg
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

df = pd.read_csv("../data/w2-saq.csv")

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    df = transforms(df, drop_temp=False, dummy_cfg=None)
In [2]:
for y_variable, y_inflect, y_log_scale in [
    ('pricesold', None, True),
    ('intsize', 125, True),
    ('hcost', 0.34, False),
]:
    display(HTML(f"""
       <h1>Comparing <font color='blue'>Total Income</font> with <font color='red'>{y_variable}</font></h1>
    """))
    
    y_min, y_max = df[y_variable].min(), df[y_variable].max()
        
    for g_variable, columns in [
        ('PType_name', 5),
        ('location_name', 5),
        ('bedrooms', 5),
        ('Carspace', 5),
        ('bathroom', 5),
    ]:
        display(HTML(f"""
          <h2>
              By <font color='green'>{g_variable}</font> compare 
              <font color='blue'>Total Income</font> Vs 
              <font color='red'>{y_variable}</font>
          </h2>
        """))
        fig, axes = plt.subplots(3, columns, figsize=(30, 18))
        for row, col, ss, (ci_l, ci_h), title in [
            (row, col, ss2, ci, f"{oo} {g_variable}={n}")
            for col, (n, ss1) in enumerate(df.groupby(g_variable, observed=True))
            for row, (oo, ss2, ci) in enumerate([
                ('Occupier & Investor', ss1, (59_000, 60_000)),
                ('Investor', ss1[ss1['owneroc'] == 0], (59_000, 60_000)),
                ('Occupier', ss1[ss1['owneroc'] == 1], (59_000, 64_000)),
            ])
        ]:
            axes[row][col].set_title(title)
            if row == 2:
                axes[2][col].set_xlabel('Total Income (Log Scale)')
            if col == 0:
                y_label = f'{y_variable} (Log Scale)' if y_log_scale else y_variable
                axes[row][0].set_ylabel(y_label)
        
            mn, mx = df['Tincome'].min(),  df['Tincome'].max()
            ssl, ssu = ss[(ss['Tincome'] <= ci_l)], ss[(ss['Tincome'] > ci_l)]
        
            axes[row][col].plot([ci_h, ci_h], [y_min, y_max], color='b', linestyle=(0, (1, 2)), label='X Inflection End')
            axes[row][col].plot([ci_l, ci_l], [y_min, y_max], color='r', linestyle=(0, (2, 1)), label='X Inflection Start')

            if y_inflect is None:
                y_max_stress = ss[ss['Tincome'] < ci_h][y_variable].max()
                y_inflect_coors = [y_max_stress, y_max_stress]
            else:
                y_inflect_coors = [y_inflect, y_inflect]
            axes[row][col].plot([mn, mx], y_inflect_coors, color='y', linestyle=(0, (2, 1)), label='Y Inflection')
            
            plot_kwargs = { 'alpha': 0.5, 'marker': 'o' }
            axes[row][col].scatter(ssl['Tincome'], ssl[y_variable], c='r', **plot_kwargs)
            axes[row][col].scatter(ssu['Tincome'], ssu[y_variable], c='g', **plot_kwargs)
            axes[row][col].legend(loc='upper right')
            axes[row][col].set_xscale('log')
            if y_log_scale:
                axes[row][col].set_yscale('log')
                
            
        
        
        fig.suptitle(f"Income V {y_variable} by {g_variable}", fontsize=16)
        plt.savefig(f"./figures/income-v-hcost-by-oo-{g_variable}.png")
        plt.show()

        display(HTML(f"""
          <h2>
              <font color='green'>{g_variable}</font> Heatmap of <font color='red'>{y_variable}</font>
          </h2>
        """))
        
        aggregates = [
            ('var', 'var'),
            ('mean', 'mean'),
            ('count', 'count'),
            ('p5', lambda x: x.quantile(0.05)),
            ('median', 'median'),
            ('p95', lambda x: x.quantile(0.95))
        ]
        
        m_summary = df.groupby(by=['owneroc_name', g_variable, 'low_income'], observed=True)[y_variable].agg(aggregates)
        
        fig, axes = plt.subplots(1, 2, figsize=(20, 5))
        for i, (low_income, sdf) in enumerate(df.groupby('low_income', observed=True)):
            summary = sdf.groupby(by=['owneroc_name', g_variable], observed=True)[y_variable].agg(aggregates)
            normalized = (summary - m_summary.min()) / (m_summary.max() - m_summary.min())
            normalized['count'] = normalized.min().min()
            sns.heatmap(normalized, annot=summary, cmap='viridis', ax=axes[i])
            axes[i].set_title('low income' if low_income else 'higher income')
        plt.savefig(f"./figures/income-v-{y_variable}-by-oo-{g_variable}-heatmap.png")
        plt.show()

Comparing Total Income with pricesold

By PType_name, compare Total Income Vs pricesold

No description has been provided for this image

PType_name Heatmap, of pricesold

No description has been provided for this image

By location_name, compare Total Income Vs pricesold

No description has been provided for this image

location_name Heatmap, of pricesold

No description has been provided for this image

By bedrooms, compare Total Income Vs pricesold

No description has been provided for this image

bedrooms Heatmap, of pricesold

No description has been provided for this image

By Carspace, compare Total Income Vs pricesold

No description has been provided for this image

Carspace Heatmap, of pricesold

No description has been provided for this image

By bathroom, compare Total Income Vs pricesold

No description has been provided for this image

bathroom Heatmap, of pricesold

No description has been provided for this image

Comparing Total Income with intsize

By PType_name, compare Total Income Vs intsize

No description has been provided for this image

PType_name Heatmap, of intsize

No description has been provided for this image

By location_name, compare Total Income Vs intsize

No description has been provided for this image

location_name Heatmap, of intsize

No description has been provided for this image

By bedrooms, compare Total Income Vs intsize

No description has been provided for this image

bedrooms Heatmap, of intsize

No description has been provided for this image

By Carspace, compare Total Income Vs intsize

No description has been provided for this image

Carspace Heatmap, of intsize

No description has been provided for this image

By bathroom, compare Total Income Vs intsize

No description has been provided for this image

bathroom Heatmap, of intsize

No description has been provided for this image

Comparing Total Income with hcost

By PType_name, compare Total Income Vs hcost

No description has been provided for this image

PType_name Heatmap, of hcost

No description has been provided for this image

By location_name, compare Total Income Vs hcost

No description has been provided for this image

location_name Heatmap, of hcost

No description has been provided for this image

By bedrooms, compare Total Income Vs hcost

No description has been provided for this image

bedrooms Heatmap, of hcost

No description has been provided for this image

By Carspace, compare Total Income Vs hcost

No description has been provided for this image

Carspace Heatmap, of hcost

No description has been provided for this image

By bathroom, compare Total Income Vs hcost

No description has been provided for this image

bathroom Heatmap, of hcost

No description has been provided for this image
In [ ]: