In [1]:
import statsmodels.formula.api as smf
import warnings
from IPython.display import HTML
from transforms import transforms, DummyVarsCfg
import seaborn as sns
from matplotlib import cm, pyplot as plt
import pandas as pd
import numpy as np
df = pd.read_csv("../data/w2-saq.csv")
with warnings.catch_warnings():
warnings.simplefilter("ignore")
df = transforms(df, drop_temp=False, dummy_cfg=None)
In [2]:
noisy_kde = dict(fill=True, alpha=0.4, levels=10, thresh=0.05, bw_adjust=0.95)
for k, ki, kwargs in [
('kde', 'kde', dict()),
('scatter', 's', dict(alpha=0.75)),
('hist', 'hist', dict()),
('reg', 'r1', dict(lowess=True, scatter_kws=dict(alpha=0.5))),
('reg', 'r2', dict(scatter_kws=dict(alpha=0.5))),
]:
for hl, hi, h, h_kwargs in [
('Owner Occupier', 'OO', 'owneroc', dict(
log={'Tincome', 'pricesold', 'hcost', 'intsize'},
pallet='rainbow'
)),
('Property Type', 'PT', 'PType_name', dict(
log={'Tincome', 'pricesold', 'hcost', 'intsize'},
pallet='gist_rainbow',
)),
('Location', 'L', 'location_name', dict(
log={'Tincome', 'pricesold', 'hcost', 'intsize'},
pallet='gist_rainbow',
)),
('Carspaces', 'CS', 'Carspace', dict(
pallet='rainbow',
log={'Tincome', 'pricesold', 'hcost', 'intsize'},
kde=noisy_kde,
)),
('Bathrooms', 'WR', 'bathroom', dict(
kde=noisy_kde,
log={'Tincome', 'pricesold', 'hcost', 'intsize'},
pallet='rainbow',
)),
('Bedrooms', 'BD', 'bedrooms', dict(
pallet='gist_rainbow',
log={'Tincome', 'pricesold', 'hcost', 'intsize'},
kde=noisy_kde,
)),
]:
g = sns.pairplot(
df, corner=True, hue=h, kind=k, height=5,
palette=h_kwargs['pallet'],
vars=[
'hcost_log' if 'hcost' in h_kwargs['log'] else 'hcost',
'Tincome_log' if 'Tincome' in h_kwargs['log'] else 'Tincome',
'pricesold_log' if 'pricesold' in h_kwargs['log'] else 'pricesold',
'intsize_log' if 'intsize' in h_kwargs['log'] else 'intsize',
],
plot_kws=dict(**kwargs, **h_kwargs.get(ki, {})),
)
g.fig.suptitle(f"[{ki}] (Group {h}) Income V Pricesold V HCost V Intsize", fontsize=20)
plt.savefig(f"./figures/4d-pairplot-{ki}-TI_PS_HC_IS_{hi}.png")
plt.show()