This page presents several additional figures that were not included in the paper but that were used in the analysis and may be of interest for readers. In addition, this page also shows the code required to create them. The page has the following sections:
%matplotlib inline
import os, sys
import pandas as pd
import pysal as ps
import results as tools
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
def _encode(cs):
if cs['Yn'] == 5:
return 'dense'
elif cs['Yn'] == 10:
return 'sprawl'
else:
return None
inds = pd.read_csv('sim_res.csv')\
.set_index(['tau', 'prop_mix', 'rep_id', 'group'])
inds.info()
inds.head()
The following figures represent the equivalent of Figure 3 in the submitted version and describe the average number of iterations as well as the number of draws the simulation performed for a given combination of population structure and value of $\tau$. There are four figures, one for each pair of type of city ("dense", "sprawl") and vacancy rate.
inds.xs('0.7_0.3', level='prop_mix').loc[('city'=='dense') & ('vacr'==0.25), :].head()
for (city, vacr), sce in inds.groupby(['city', 'vacr']):
print city, vacr
out, ts = tools.build_convfreqs(sce)
if (city == 'dense') and (vacr == 0.25):
out, ts = tools.build_convfreqs(sce)
out, ts = tools.build_convfreqs(sce)
for (city, vacr), sce in inds.groupby(['city', 'vacr']):
print '####################################################'
print city, vacr
scetp = tools.build_tauplot_by_scenario(sce.drop(['city', 'job', 'vacr'], axis=1), \
scenarios=\
['0.5_0.5', '0.7_0.3', \
'0.4_0.4_0.1_0.1', \
'0.7_0.1_0.1_0.1', \
'0.4_0.3_0.2_0.1', \
'0.2_0.2_0.2_0.2_0.2'])
for (city, vacr), sce in inds.groupby(['city', 'vacr']):
print '####################################################'
print city, vacr
scetp = tools.build_tauplot_by_scenario(sce.drop(['city', 'job', 'vacr'], axis=1), \
scenarios=\
['0.5_0.5', '0.7_0.3', \
'0.4_0.4_0.1_0.1', \
'0.7_0.1_0.1_0.1', \
'0.4_0.3_0.2_0.1', \
'0.2_0.2_0.2_0.2_0.2'], \
fun='std')
g = inds[(inds['vacr']==0.25) & (inds['city']=='dense')]['theil_th']\
.groupby(level=['prop_mix', 'tau', 'rep_id']).first()\
.groupby(level=['prop_mix', 'tau']).agg(['mean', 'std'])
X
range particular to each scenario:
tools.build_meanStd_plots(g, stdX=False)
tools.build_meanStd_plots(g, stdX=False)
X
range standardized across scenarios:
tools.build_meanStd_plots(g, stdX=True)
tools.build_meanStd_plots(g, stdX=True)
db = inds.reset_index()
db['sprawl'] = 1
db.loc[db['city']=='dense', 'sprawl'] = 0
db['group_in_mix'] = db['prop_mix'] + '|' + db['group']
db.info()
Response surface analysis is about extrapolating conclusions from a simulation into cases not explicitly considered by the experiment itself. In particular, it is usually done through the estimation of a regression that looks at the impact of different simulation parameters on the outcome.
vari = 'mean'
means = db.groupby(['prop_mix', 'group_in_mix', 'tau', 'vacr', 'sprawl'])\
.agg({'rep_id': 'size', \
'ticks': vari, \
'ellison_glaeser_egg_pop': vari, \
'isolation_ii': vari, \
'segregation_gsg': vari, \
'theil_th': vari})\
.reset_index()
means['tau2'] = means['tau']**2
means['tau3'] = means['tau']**3
means.loc[means['vacr']==0.15, 'vacr'] = 0
means.loc[means['vacr']==0.25, 'vacr'] = 1
Averaging over the draws of each combination of tau, proportion mix, group, vacancy rate and density setup, we can run regressions on the average and std values of each index tested, using the following variables as explanatory ones:
name_y = ['isolation_ii', 'segregation_gsg', 'ellison_glaeser_egg_pop']
Additionally, for the Theil index, we need to create a different dataset, so it is run separately.
rsa = {}
xs = ['tau', 'tau2', 'tau3', 'vacr', 'sprawl']
xd = pd.get_dummies(means['group_in_mix'], prefix='gim_')
xd = xd.rename(columns={'gim__0.5_0.5|g0-0.50': 'Intercept'})
xd['Intercept'] = 1
x = means[xs].join(xd)
for y in name_y:
ols = sm.OLS(means[y], x).fit()
rsa[y] = ols
# Theil
meanT = means.groupby(['tau', 'prop_mix', 'vacr', 'sprawl', 'rep_id'])\
.first()\
.reset_index()
xdt = pd.get_dummies(meanT['prop_mix'], prefix='pm_')
xdt = xdt.rename(columns={'pm__0.5_0.5': 'Intercept'})
xdt['Intercept'] = 1
xt = meanT[xs].join(xdt)
ols = sm.OLS(meanT['theil_th'], xt).fit()
rsa['theil_th'] = ols
We can obtain a summarizing table of all the models:
def starify(p):
if p < 0.001:
return '***'
elif 0.01 <= p < 0.05:
return '**'
elif 0.05 <= p < 0.1:
return '*'
else:
return ''
table = {}
for model in rsa:
table[(model, 'beta')] = pd.np.round(rsa[model].params, 4)
table[(model, 'sig')] = rsa[model].pvalues.apply(starify)
table = pd.DataFrame(table)
order = ['tau', 'tau2', 'tau3', 'vacr', 'sprawl', 'Intercept'] \
+ [i for i in table.index if 'm_' in i]
final_tab = table.reindex(order).fillna('')
final_tab
f, axs = plt.subplots(1, 4, figsize=(12, 3))
axs = axs.ravel()
for y, ax in zip(name_y, axs):
betas = rsa[y].params
ax = tools.main_effect_plot(y, 'tau', betas, \
x.join(means[y]), ax=ax)
ax.set_title(y)
y = 'theil_th'
betas = rsa[y].params
ax = tools.main_effect_plot(y, 'tau', betas, \
xt.join(meanT[y]), ax=axs[-1])
ax.set_title(y)
plt.tight_layout()
plt.show()
The full models can also be printed (for online access only):
for model in rsa:
print '\n#############################'
print model
print '#############################'
print rsa[model].summary2()
In this case, we include every single observation in the regression, instead of the average value of the run. This should take care of some of the variance issues and give more accurate results.
%%time
db['tau2'] = db['tau']**2
db['tau3'] = db['tau']**3
db.loc[db['vacr']==0.15, 'vacr'] = 0
db.loc[db['vacr']==0.25, 'vacr'] = 1
rsai = {}
xsi = ['tau', 'tau2', 'tau3', 'vacr', 'sprawl']
xdi = pd.get_dummies(db['group_in_mix'], prefix='gim_')
xdi = xdi.rename(columns={'gim__0.5_0.5|g0-0.50': 'Intercept'})
xdi['Intercept'] = 1
xi = db[xsi].join(xdi)
for y in name_y:
ols = sm.OLS(db[y], xi).fit()
rsai[y] = ols
# Theil
dbT = db.groupby(['tau', 'prop_mix', 'vacr', 'sprawl', 'rep_id'])\
.first()\
.reset_index()
xdti = pd.get_dummies(dbT['prop_mix'], prefix='pm_')
xdti = xdti.rename(columns={'pm__0.5_0.5': 'Intercept'})
xdti['Intercept'] = 1
xti = dbT[xsi].join(xdti)
ols = sm.OLS(dbT['theil_th'], xti).fit()
rsai['theil_th'] = ols
def starify(p):
if p < 0.001:
return '***'
elif 0.01 <= p < 0.05:
return '**'
elif 0.05 <= p < 0.1:
return '*'
else:
return ''
tablei = {}
for model in rsai:
tablei[(model, 'beta')] = pd.np.round(rsai[model].params, 4)
tablei[(model, 'sig')] = rsai[model].pvalues.apply(starify)
tablei[(model, 'beta')] = pd.concat([tablei[(model, 'beta')], \
pd.Series(rsai[model].nobs, index=['N'])])
tablei = pd.DataFrame(tablei)
orderi = ['tau', 'tau2', 'tau3', 'vacr', 'sprawl', 'Intercept'] \
+ [i for i in tablei.index if 'm_' in i] + ['N']
final_tabi = tablei.reindex(orderi).fillna('')
final_tabi
f, axs = plt.subplots(1, 4, figsize=(12, 3))
axs = axs.ravel()
for y, ax in zip(name_y, axs):
betas = rsai[y].params
ax = tools.main_effect_plot(y, 'tau', betas, \
xi.join(db[y]), ax=ax)
ax.set_title(y)
y = 'theil_th'
betas = rsai[y].params
ax = tools.main_effect_plot(y, 'tau', betas, \
xti.join(dbT[y]), ax=axs[-1])
ax.set_title(y)
plt.tight_layout()
plt.show()
for model in rsai:
print '\n#############################'
print model
print '#############################'
print rsa[model].summary2()