In [1]:
import statsmodels.formula.api as smf
import warnings
from IPython.display import HTML
from transforms import transforms, DummyVarsCfg
import seaborn as sns
from matplotlib import cm, pyplot as plt
import pandas as pd
import numpy as np

df = pd.read_csv("../data/w2-saq.csv")

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    df = transforms(df, drop_temp=False, dummy_cfg=None)
In [2]:
noisy_kde = dict(fill=True, alpha=0.4, levels=10, thresh=0.05, bw_adjust=0.95)


for k, ki, kwargs in [
    ('kde', 'kde', dict()),
    ('scatter', 's', dict(alpha=0.75)),
    ('hist', 'hist', dict()),
    ('reg', 'r1', dict(lowess=True, scatter_kws=dict(alpha=0.5))),
    ('reg', 'r2', dict(scatter_kws=dict(alpha=0.5))),
]:
    for hl, hi, h, h_kwargs in [
        ('Owner Occupier', 'OO', 'owneroc', dict(
            log={'Tincome', 'pricesold', 'hcost', 'intsize'},
            pallet='rainbow'
        )),
        ('Property Type', 'PT', 'PType_name', dict(
            log={'Tincome', 'pricesold', 'hcost', 'intsize'},
            pallet='gist_rainbow',
        )),
        ('Location', 'L', 'location_name', dict(
            log={'Tincome', 'pricesold', 'hcost', 'intsize'},
            pallet='gist_rainbow',
        )),
        ('Carspaces', 'CS', 'Carspace', dict(
            pallet='rainbow',
            log={'Tincome', 'pricesold', 'hcost', 'intsize'},
            kde=noisy_kde,
        )),
        ('Bathrooms', 'WR', 'bathroom', dict(
            kde=noisy_kde,
            log={'Tincome', 'pricesold', 'hcost', 'intsize'},
        
            pallet='rainbow',
        )),
        ('Bedrooms', 'BD', 'bedrooms', dict(
            pallet='gist_rainbow',
            log={'Tincome', 'pricesold', 'hcost', 'intsize'},
            kde=noisy_kde,
        )),
    ]:   
        
        g = sns.pairplot(
            df, corner=True, hue=h, kind=k, height=5,
            palette=h_kwargs['pallet'],
            vars=[
                'hcost_log' if 'hcost' in h_kwargs['log'] else 'hcost', 
                'Tincome_log' if 'Tincome' in h_kwargs['log'] else 'Tincome', 
                'pricesold_log' if 'pricesold' in h_kwargs['log'] else 'pricesold', 
                'intsize_log' if 'intsize' in h_kwargs['log'] else 'intsize',
            ],
            plot_kws=dict(**kwargs, **h_kwargs.get(ki, {})),
        )
            
        g.fig.suptitle(f"[{ki}] (Group {h}) Income V Pricesold V HCost V Intsize", fontsize=20)
        plt.savefig(f"./figures/4d-pairplot-{ki}-TI_PS_HC_IS_{hi}.png")
        plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image