8. Conditional Manatees¶

# Install packages that are not installed in colab
try:
  import google.colab
  %pip install -q watermark
  %pip install git+https://github.com/ksachdeva/rethinking-tensorflow-probability.git
except:
  pass

%load_ext watermark

# Core
import numpy as np
import arviz as az
import xarray as xr
import tensorflow as tf
import tensorflow_probability as tfp

# visualization
import matplotlib.pyplot as plt

from rethinking.data import RethinkingDataset
from rethinking.data import dataframe_to_tensors
from rethinking.mcmc import sample_posterior

# aliases
tfd = tfp.distributions
tfb = tfp.bijectors
Root = tfd.JointDistributionCoroutine.Root

2022-01-19 19:01:11.227162: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-19 19:01:11.227210: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

%watermark -p numpy,tensorflow,tensorflow_probability,arviz,scipy,pandas

numpy                 : 1.21.5
tensorflow            : 2.7.0
tensorflow_probability: 0.15.0
arviz                 : 0.11.4
scipy                 : 1.7.3
pandas                : 1.3.5

# config of various plotting libraries
%config InlineBackend.figure_format = 'retina'

8.1 Building an interaction¶

8.1.1 Making a rugged model¶

Code 8.1¶

Every inference is conditional on something whether we notice it or not !

So far in the book, it was assumed that the predictor variable has an independent association with the mean of the outcome. What if want to allow the association to be conditional ?

d = RethinkingDataset.Rugged.get_dataset()

# make log version of outcome
d["log_gdp"] = d["rgdppc_2000"].pipe(np.log)

# extract countries with GDP data
dd = d[d["rgdppc_2000"].notnull()].copy()

# rescale variables
dd["log_gdp_std"] = dd.log_gdp / dd.log_gdp.mean()
dd["rugged_std"] = dd.rugged / dd.rugged.max()

# split countries into Africa and not-Africa
d_A1 = dd[dd["cont_africa"] == 1]  # Africa
d_A0 = dd[dd["cont_africa"] == 0]  # not Africa

Note - terrain ruggedness is divided by the maximum value observed

Code 8.2¶

def model_8_1(rugged_std):
    def _generator():
        alpha = yield Root(
            tfd.Sample(tfd.Normal(loc=1.0, scale=1.0, name="alpha"), sample_shape=1)
        )
        beta = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=1.0, name="beta"), sample_shape=1)
        )
        sigma = yield Root(
            tfd.Sample(tfd.Exponential(rate=1.0, name="sigma"), sample_shape=1)
        )

        mu = alpha + beta * (rugged_std - 0.215)

        log_gdp_std = yield tfd.Independent(
            tfd.Normal(loc=mu, scale=sigma), reinterpreted_batch_ndims=1
        )

    return tfd.JointDistributionCoroutine(_generator, validate_args=False)

Code 8.3¶

jdc_8_1 = model_8_1(rugged_std=dd["rugged_std"].values)

sample_alpha, sample_beta, sample_sigma, _ = jdc_8_1.sample(1000)

# set up the plot dimensions
plt.subplot(xlim=(0, 1), ylim=(0.5, 1.5), xlabel="ruggedness", ylabel="log GDP")
plt.gca().axhline(dd.log_gdp_std.min(), ls="--")
plt.gca().axhline(dd.log_gdp_std.max(), ls="--")

# draw 50 lines from the prior
rugged_seq = np.linspace(-0.1, 1.1, num=30)

jdc_8_1_test = model_8_1(rugged_std=rugged_seq)

ds, _ = jdc_8_1_test.sample_distributions(
    value=[sample_alpha, sample_beta, sample_sigma, None]
)

mu = ds[-1].distribution.loc

for i in range(50):
    plt.plot(rugged_seq, mu[i], "k", alpha=0.3)

2022-01-19 19:01:14.138503: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-01-19 19:01:14.138539: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-01-19 19:01:14.138564: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fv-az272-145): /proc/driver/nvidia/version does not exist
2022-01-19 19:01:14.138788: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.

../_images/08_conditional_manatees_15_1.png

Code 8.4¶

np.sum(np.abs(sample_beta) > 0.6) / sample_beta.shape[0]

0.56

Code 8.5¶

tdf = dataframe_to_tensors("Rugged", dd, ["rugged_std", "log_gdp_std"])

def model_8_1(rugged_std):
    def _generator():
        alpha = yield Root(
            tfd.Sample(tfd.Normal(loc=1.0, scale=0.1, name="alpha"), sample_shape=1)
        )
        beta = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.3, name="beta"), sample_shape=1)
        )
        sigma = yield Root(
            tfd.Sample(tfd.Exponential(rate=1.0, name="sigma"), sample_shape=1)
        )

        mu = alpha[..., tf.newaxis] + beta[..., tf.newaxis] * (rugged_std - 0.215)
        scale = sigma[..., tf.newaxis]

        log_gdp_std = yield tfd.Independent(
            tfd.Normal(loc=mu, scale=scale), reinterpreted_batch_ndims=1
        )

    return tfd.JointDistributionCoroutine(_generator, validate_args=False)


jdc_8_1 = model_8_1(tdf.rugged_std)

sample_alpha, sample_beta, sample_sigma, _ = jdc_8_1.sample(1000)

# set up the plot dimensions
plt.subplot(xlim=(0, 1), ylim=(0.5, 1.5), xlabel="ruggedness", ylabel="log GDP")
plt.gca().axhline(dd.log_gdp_std.min(), ls="--")
plt.gca().axhline(dd.log_gdp_std.max(), ls="--")

# draw 50 lines from the prior
rugged_seq = np.linspace(-0.1, 1.1, num=30)

jdc_8_1_test = model_8_1(rugged_std=rugged_seq)

ds, _ = jdc_8_1_test.sample_distributions(
    value=[sample_alpha, sample_beta, sample_sigma, None]
)

mu = tf.squeeze(ds[-1].distribution.loc)

for i in range(50):
    plt.plot(rugged_seq, mu[i], "k", alpha=0.3)

../_images/08_conditional_manatees_21_0.png

Code 8.6¶

NUM_CHAINS_FOR_8_1 = 2

init_state = [
    tf.ones([NUM_CHAINS_FOR_8_1]),
    tf.zeros([NUM_CHAINS_FOR_8_1]),
    tf.ones([NUM_CHAINS_FOR_8_1]),
]


bijectors = [
    tfb.Identity(),
    tfb.Identity(),
    tfb.Exp(),
]

posterior_8_1, trace_8_1 = sample_posterior(
    jdc_8_1,
    observed_data=(tdf.log_gdp_std,),
    params=["alpha", "beta", "sigma"],
    init_state=init_state,
    bijectors=bijectors,
)

az.summary(trace_8_1, hdi_prob=0.89)

	mean	sd	hdi_5.5%	hdi_94.5%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
alpha	1.000	0.010	0.984	1.018	0.000	0.000	1028.0	433.0	1.00
beta	0.002	0.057	-0.078	0.093	0.003	0.002	278.0	416.0	1.02
sigma	0.138	0.008	0.125	0.149	0.000	0.000	372.0	288.0	1.00

8.1.2 Adding an indicator variable isn’t enough¶

Code 8.7¶

# make variable to index Africa (0) or not (1)
dd["cid"] = np.where(dd.cont_africa.values == 1, 0, 1)

dd["cid"]

    0
    1
    1
    1
    1
      ..
  1
  1
  0
  0
  0
Name: cid, Length: 170, dtype: int64

Code 8.8¶

tdf = dataframe_to_tensors(
    "Rugged", dd, {"rugged_std": tf.float32, "log_gdp_std": tf.float32, "cid": tf.int32}
)

def model_8_2(cid, rugged_std):
    def _generator():
        alpha = yield Root(
            tfd.Sample(tfd.Normal(loc=1.0, scale=0.1, name="alpha"), sample_shape=2)
        )
        beta = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.3, name="beta"), sample_shape=1)
        )
        sigma = yield Root(
            tfd.Sample(tfd.Exponential(rate=1.0, name="sigma"), sample_shape=1)
        )

        mu = tf.gather(alpha, cid, axis=-1) + beta[..., tf.newaxis] * (
            rugged_std - 0.215
        )
        scale = sigma[..., tf.newaxis]

        log_gdp_std = yield tfd.Independent(
            tfd.Normal(loc=mu, scale=scale), reinterpreted_batch_ndims=1
        )

    return tfd.JointDistributionCoroutine(_generator, validate_args=False)


jdc_8_2 = model_8_2(tdf.cid, rugged_std=tdf.rugged_std)

NUMBER_OF_CHAINS = 2

init_state = [
    tf.ones([NUMBER_OF_CHAINS, 2]),
    tf.zeros([NUMBER_OF_CHAINS]),
    tf.ones([NUMBER_OF_CHAINS]),
]

bijectors = [
    tfb.Identity(),
    tfb.Identity(),
    tfb.Exp(),
]

posterior_8_2, trace_8_2 = sample_posterior(
    jdc_8_2,
    observed_data=(tdf.log_gdp_std,),
    params=["alpha", "beta", "sigma"],
    init_state=init_state,
    bijectors=bijectors,
)

az.summary(trace_8_2, hdi_prob=0.89)

	mean	sd	hdi_5.5%	hdi_94.5%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
alpha[0]	0.881	0.016	0.852	0.905	0.000	0.000	1226.0	782.0	1.0
alpha[1]	1.050	0.010	1.035	1.066	0.001	0.000	285.0	268.0	1.0
beta	-0.047	0.048	-0.136	0.019	0.002	0.002	479.0	459.0	1.0
sigma	0.114	0.007	0.104	0.125	0.000	0.000	291.0	365.0	1.0

Code 8.9¶

def compute_and_store_log_likelihood_for_model_8_1():

    sample_alpha = posterior_8_1["alpha"]
    sample_beta = posterior_8_1["beta"]
    sample_sigma = posterior_8_1["sigma"]

    ds, _ = jdc_8_1.sample_distributions(
        value=[sample_alpha, sample_beta, sample_sigma, None]
    )

    log_likelihood_8_1 = ds[-1].distribution.log_prob(tdf.log_gdp_std).numpy()

    # we need to insert this in the sampler_stats
    sample_stats_8_1 = trace_8_1.sample_stats

    coords = [
        sample_stats_8_1.coords["chain"],
        sample_stats_8_1.coords["draw"],
        np.arange(170),
    ]

    sample_stats_8_1["log_likelihood"] = xr.DataArray(
        log_likelihood_8_1,
        coords=coords,
        dims=["chain", "draw", "log_likelihood_dim_0"],
    )


compute_and_store_log_likelihood_for_model_8_1()

# We need to first compute the log likelihoods so as to be able to compare them
def compute_and_store_log_likelihood_for_model_8_2():

    sample_alpha = posterior_8_2["alpha"]
    sample_beta = posterior_8_2["beta"]
    sample_sigma = posterior_8_2["sigma"]

    ds, _ = jdc_8_2.sample_distributions(
        value=[sample_alpha, sample_beta, sample_sigma, None]
    )

    log_likelihood_8_2 = ds[-1].distribution.log_prob(tdf.log_gdp_std).numpy()

    # we need to insert this in the sampler_stats
    sample_stats_8_2 = trace_8_2.sample_stats

    coords = [
        sample_stats_8_2.coords["chain"],
        sample_stats_8_2.coords["draw"],
        np.arange(170),
    ]

    sample_stats_8_2["log_likelihood"] = xr.DataArray(
        log_likelihood_8_2,
        coords=coords,
        dims=["chain", "draw", "log_likelihood_dim_0"],
    )


compute_and_store_log_likelihood_for_model_8_2()

az.compare({"m8.1": trace_8_1, "m8.2": trace_8_2})

	rank	loo	p_loo	d_loo	weight	se	dse	warning	loo_scale
m8.2	0	125.968312	4.329834	0.000000	0.964731	7.448146	0.000000	False	log
m8.1	1	94.428626	2.590254	31.539686	0.035269	6.506622	7.386764	False	log

Code 8.10¶

az.summary(trace_8_2, hdi_prob=0.89)

	mean	sd	hdi_5.5%	hdi_94.5%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
alpha[0]	0.881	0.016	0.852	0.905	0.000	0.000	1226.0	782.0	1.0
alpha[1]	1.050	0.010	1.035	1.066	0.001	0.000	285.0	268.0	1.0
beta	-0.047	0.048	-0.136	0.019	0.002	0.002	479.0	459.0	1.0
sigma	0.114	0.007	0.104	0.125	0.000	0.000	291.0	365.0	1.0

Code 8.11¶

diff_a = trace_8_2.posterior["alpha"][:, :, 0] - trace_8_2.posterior["alpha"][:, :, 1]

trace_8_2.posterior["diff_a"] = diff_a

az.summary(trace_8_2, hdi_prob=0.89)

	mean	sd	hdi_5.5%	hdi_94.5%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
alpha[0]	0.881	0.016	0.852	0.905	0.000	0.000	1226.0	782.0	1.0
alpha[1]	1.050	0.010	1.035	1.066	0.001	0.000	285.0	268.0	1.0
beta	-0.047	0.048	-0.136	0.019	0.002	0.002	479.0	459.0	1.0
sigma	0.114	0.007	0.104	0.125	0.000	0.000	291.0	365.0	1.0
diff_a	-0.169	0.019	-0.196	-0.135	0.001	0.001	639.0	624.0	1.0

Code 8.12¶

rugged_seq = np.linspace(start=-1, stop=1.1, num=30)

sample_alpha = posterior_8_2["alpha"][0]
sample_beta = posterior_8_2["beta"][0]
sample_sigma = posterior_8_2["sigma"][0]

# compute mu over samples, fixing cid=1
jdc_8_2_test_cid_1 = model_8_2(np.repeat(1, 30), rugged_std=rugged_seq)

ds, samples = jdc_8_2_test_cid_1.sample_distributions(
    value=[sample_alpha, sample_beta, sample_sigma, None]
)

mu_NotAfrica = tf.squeeze(ds[-1].distribution.loc)
mu_NotAfrica_mu = tf.reduce_mean(mu_NotAfrica, 0)
mu_NotAfrica_ci = tfp.stats.percentile(mu_NotAfrica, q=(1.5, 98.5), axis=0)

# compute mu over samples, fixing cid=0
jdc_8_2_test_cid_0 = model_8_2(np.repeat(0, 30), rugged_std=rugged_seq)

ds, samples = jdc_8_2_test_cid_0.sample_distributions(
    value=[sample_alpha, sample_beta, sample_sigma, None]
)

mu_Africa = tf.squeeze(ds[-1].distribution.loc)
mu_Africa_mu = tf.reduce_mean(mu_Africa, 0)
mu_Africa_ci = tfp.stats.percentile(mu_Africa, q=(1.5, 98.5), axis=0)

# draw figure 8.4 in the book [note - no code was provided for it in the book]

plt.xlim(0.0, 1.0)
plt.scatter(
    dd.rugged_std,
    dd.log_gdp_std,
    edgecolors=["none" if i == 0 else "b" for i in dd["cid"]],
    facecolors=["none" if i == 1 else "b" for i in dd["cid"]],
)

# draw MAP line
plt.plot(rugged_seq, mu_Africa_mu, "b", label="Africa")
plt.fill_between(rugged_seq, mu_Africa_ci[0], mu_Africa_ci[1], color="b", alpha=0.2)

plt.plot(rugged_seq, mu_NotAfrica_mu, "k", label="Not Africa")
plt.fill_between(
    rugged_seq, mu_NotAfrica_ci[0], mu_NotAfrica_ci[1], color="k", alpha=0.2
)

plt.legend();

../_images/08_conditional_manatees_43_0.png

Code 8.13¶

def model_8_3(cid, rugged_std):
    def _generator():
        alpha = yield Root(
            tfd.Sample(tfd.Normal(loc=1.0, scale=0.1, name="alpha"), sample_shape=2)
        )
        beta = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.3, name="beta"), sample_shape=2)
        )
        sigma = yield Root(
            tfd.Sample(tfd.Exponential(rate=1.0, name="sigma"), sample_shape=1)
        )

        mu = tf.gather(alpha, cid, axis=-1) + tf.gather(beta, cid, axis=-1) * (
            rugged_std - 0.215
        )
        scale = sigma[..., tf.newaxis]

        log_gdp_std = yield tfd.Independent(
            tfd.Normal(loc=mu, scale=scale), reinterpreted_batch_ndims=1
        )

    return tfd.JointDistributionCoroutine(_generator, validate_args=False)


jdc_8_3 = model_8_3(tdf.cid, rugged_std=tdf.rugged_std)

init_state = [
    tf.ones([NUMBER_OF_CHAINS, 2]),
    tf.zeros([NUMBER_OF_CHAINS, 2]),
    tf.ones([NUMBER_OF_CHAINS]),
]

bijectors = [
    tfb.Identity(),
    tfb.Identity(),
    tfb.Exp(),
]

posterior_8_3, trace_8_3 = sample_posterior(
    jdc_8_3,
    observed_data=(tdf.log_gdp_std,),
    params=["alpha", "beta", "sigma"],
    num_samples=1000,
    init_state=init_state,
    bijectors=bijectors,
)

Code 8.14¶

az.summary(trace_8_3, hdi_prob=0.89)

	mean	sd	hdi_5.5%	hdi_94.5%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
alpha[0]	0.886	0.016	0.861	0.911	0.000	0.000	2375.0	1378.0	1.00
alpha[1]	1.050	0.010	1.036	1.067	0.000	0.000	431.0	587.0	1.01
beta[0]	0.133	0.079	0.008	0.256	0.005	0.004	239.0	464.0	1.01
beta[1]	-0.144	0.055	-0.230	-0.062	0.002	0.002	582.0	892.0	1.00
sigma	0.111	0.006	0.101	0.120	0.000	0.000	522.0	614.0	1.01

Code 8.15¶

# We need to first compute the log likelihoods so as to be able to compare them
def compute_and_store_log_likelihood_for_model_8_3():

    sample_alpha = posterior_8_3["alpha"]
    sample_beta = posterior_8_3["beta"]
    sample_sigma = posterior_8_3["sigma"]

    ds, _ = jdc_8_3.sample_distributions(
        value=[sample_alpha, sample_beta, sample_sigma, None]
    )

    log_likelihood_8_3 = ds[-1].distribution.log_prob(tdf.log_gdp_std).numpy()

    # we need to insert this in the sampler_stats
    sample_stats_8_3 = trace_8_3.sample_stats

    coords = [
        sample_stats_8_3.coords["chain"],
        sample_stats_8_3.coords["draw"],
        np.arange(170),
    ]

    sample_stats_8_3["log_likelihood"] = xr.DataArray(
        log_likelihood_8_3,
        coords=coords,
        dims=["chain", "draw", "log_likelihood_dim_0"],
    )


compute_and_store_log_likelihood_for_model_8_3()

az.compare({"m8.1": trace_8_1, "m8.2": trace_8_2, "m8.3": trace_8_3})

	rank	loo	p_loo	d_loo	weight	se	dse	warning	loo_scale
m8.3	0	129.606079	5.002433	0.000000	8.738141e-01	7.358786	0.000000	False	log
m8.2	1	125.968900	4.329246	3.637179	1.261859e-01	7.448509	3.310409	False	log
m8.1	2	94.428626	2.590254	35.177453	4.440892e-16	6.506622	7.503596	False	log

Code 8.16¶

az.waic(trace_8_3, pointwise=True)

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/arviz/stats/stats.py:1460: UserWarning: For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail. 
See http://arxiv.org/abs/1507.04544 for details
  "For one or more samples the posterior variance of the log predictive "

Computed from 2000 by 170 log-likelihood matrix

          Estimate       SE
elpd_waic   129.67     7.35
p_waic        4.94        -

There has been a warning during the calculation. Please check the results.

8.1.4 Plotting the interaction¶

Code 8.17¶

rugged_seq = np.linspace(start=-1, stop=1.1, num=30)

sample_alpha = posterior_8_3["alpha"][0]
sample_beta = posterior_8_3["beta"][0]
sample_sigma = posterior_8_3["sigma"][0]

# compute mu over samples, fixing cid=1
jdc_8_3_test_cid_1 = model_8_3(np.repeat(1, 30), rugged_std=rugged_seq)

ds, samples = jdc_8_3_test_cid_1.sample_distributions(
    value=[sample_alpha, sample_beta, sample_sigma, None]
)

mu_NotAfrica = tf.squeeze(ds[-1].distribution.loc)
mu_NotAfrica_mu = tf.reduce_mean(mu_NotAfrica, 0)
mu_NotAfrica_ci = tfp.stats.percentile(mu_NotAfrica, q=(1.5, 98.5), axis=0)

# compute mu over samples, fixing cid=0
jdc_8_3_test_cid_0 = model_8_3(np.repeat(0, 30), rugged_std=rugged_seq)

ds, samples = jdc_8_3_test_cid_0.sample_distributions(
    value=[sample_alpha, sample_beta, sample_sigma, None]
)

mu_Africa = tf.squeeze(ds[-1].distribution.loc)
mu_Africa_mu = tf.reduce_mean(mu_Africa, 0)
mu_Africa_ci = tfp.stats.percentile(mu_Africa, q=(1.5, 98.5), axis=0)

az.plot_pair(d_A1[["rugged_std", "log_gdp_std"]].to_dict(orient="list"))
plt.gca().set(
    xlim=(-0.01, 1.01),
    xlabel="ruggedness (standardized)",
    ylabel="log GDP (as proportion of mean)",
)

plt.plot(rugged_seq, mu_Africa_mu, "k")
plt.fill_between(rugged_seq, mu_Africa_ci[0], mu_Africa_ci[1], color="b", alpha=0.2)
plt.title("African nations")
plt.show();

../_images/08_conditional_manatees_57_0.png

az.plot_pair(d_A1[["rugged_std", "log_gdp_std"]].to_dict(orient="list"))
plt.gca().set(
    xlim=(-0.01, 1.01),
    xlabel="ruggedness (standardized)",
    ylim=(0.8, 1.3),
    ylabel="log GDP (as proportion of mean)",
)

plt.plot(rugged_seq, mu_NotAfrica_mu, "k")
plt.fill_between(
    rugged_seq, mu_NotAfrica_ci[0], mu_NotAfrica_ci[1], color="k", alpha=0.2
)
plt.title("Non-African nations")
plt.show()

../_images/08_conditional_manatees_58_0.png

8.2 Symmetry of interactions¶

Code 8.18¶

# TODO - draw corresponding graph (Figure 8.6)

rugged_seq = np.linspace(start=-0.2, stop=1.2, num=30)
delta = mu_Africa_mu - mu_NotAfrica_mu

8.3 Continuous interactions¶

8.3.1 A winter flower¶

Code 8.19¶

d = RethinkingDataset.Tulips.get_dataset()
d.info()
d.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   bed     27 non-null     object 
 1   water   27 non-null     int64  
 2   shade   27 non-null     int64  
 3   blooms  27 non-null     float64
dtypes: float64(1), int64(2), object(1)
memory usage: 992.0+ bytes

	bed	water	shade	blooms
0	a	1	1	0.00
1	a	1	2	0.00
2	a	1	3	111.04
3	a	2	1	183.47
4	a	2	2	59.16

Code 8.20¶

d["blooms_std"] = d.blooms / d.blooms.max()
d["water_cent"] = d.water - d.water.mean()
d["shade_cent"] = d.shade - d.shade.mean()

Code 8.21¶

a = tfd.Normal(loc=0.5, scale=1.0).sample((int(1e4),))
np.sum((a < 0) | (a > 1)) / a.shape[0]

0.6196

Code 8.22¶

a = tfd.Normal(loc=0.5, scale=0.25).sample((int(1e4),))
np.sum((a < 0) | (a > 1)) / a.shape[0]

0.0497

Code 8.23¶

tdf = dataframe_to_tensors("Tulip", d, ["blooms_std", "water_cent", "shade_cent"])

def model_8_4(
    water_cent,
    shade_cent,
):
    def _generator():
        alpha = yield Root(
            tfd.Sample(tfd.Normal(loc=0.5, scale=0.25, name="alpha"), sample_shape=1)
        )
        betaW = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.25, name="betaW"), sample_shape=1)
        )
        betaS = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.25, name="betaS"), sample_shape=1)
        )
        sigma = yield Root(
            tfd.Sample(tfd.Exponential(rate=1.0, name="sigma"), sample_shape=1)
        )

        mu = (
            alpha[..., tf.newaxis]
            + betaW[..., tf.newaxis] * water_cent
            + betaS[..., tf.newaxis] * shade_cent
        )
        scale = sigma[..., tf.newaxis]

        log_gdp_std = yield tfd.Independent(
            tfd.Normal(loc=mu, scale=scale), reinterpreted_batch_ndims=1
        )

    return tfd.JointDistributionCoroutine(_generator, validate_args=False)


jdc_8_4 = model_8_4(shade_cent=tdf.shade_cent, water_cent=tdf.water_cent)

init_state = [
    0.5 * tf.ones([NUMBER_OF_CHAINS]),
    tf.zeros([NUMBER_OF_CHAINS]),
    tf.zeros([NUMBER_OF_CHAINS]),
    tf.ones([NUMBER_OF_CHAINS]),
]

bijectors = [
    tfb.Identity(),
    tfb.Identity(),
    tfb.Identity(),
    tfb.Exp(),
]

posterior_8_4, trace_8_4 = sample_posterior(
    jdc_8_4,
    observed_data=(tdf.blooms_std,),
    params=["alpha", "betaW", "betaS", "sigma"],
    num_samples=1000,
    init_state=init_state,
    bijectors=bijectors,
)

Code 8.24¶

def model_8_5(water_cent, shade_cent):
    def _generator():
        alpha = yield Root(
            tfd.Sample(tfd.Normal(loc=0.5, scale=0.25, name="alpha"), sample_shape=1)
        )
        betaW = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.25, name="betaW"), sample_shape=1)
        )
        betaS = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.25, name="betaS"), sample_shape=1)
        )
        betaWS = yield Root(
            tfd.Sample(tfd.Normal(loc=0.0, scale=0.25, name="betaWS"), sample_shape=1)
        )
        sigma = yield Root(
            tfd.Sample(tfd.Exponential(rate=1.0, name="sigma"), sample_shape=1)
        )

        mu = (
            alpha[..., tf.newaxis]
            + betaW[..., tf.newaxis] * water_cent
            + betaS[..., tf.newaxis] * shade_cent
            + betaWS[..., tf.newaxis] * shade_cent * water_cent
        )

        scale = sigma[..., tf.newaxis]

        log_gdp_std = yield tfd.Independent(
            tfd.Normal(loc=mu, scale=scale), reinterpreted_batch_ndims=1
        )

    return tfd.JointDistributionCoroutine(_generator, validate_args=False)


jdc_8_5 = model_8_5(shade_cent=tdf.shade_cent, water_cent=tdf.water_cent)

init_state = [
    0.5 * tf.ones([NUMBER_OF_CHAINS]),
    tf.zeros([NUMBER_OF_CHAINS]),
    tf.zeros([NUMBER_OF_CHAINS]),
    tf.zeros([NUMBER_OF_CHAINS]),
    tf.ones([NUMBER_OF_CHAINS]),
]

bijectors = [
    tfb.Identity(),
    tfb.Identity(),
    tfb.Identity(),
    tfb.Identity(),
    tfb.Exp(),
]

posterior_8_5, trace_8_5 = sample_posterior(
    jdc_8_5,
    observed_data=(tdf.blooms_std,),
    params=["alpha", "betaW", "betaS", "betaWS", "sigma"],
    num_samples=1000,
    init_state=init_state,
    bijectors=bijectors,
)

WARNING:tensorflow:5 out of the last 5 calls to <function run_hmc_chain at 0x7fb05a5af7a0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.

8.3.3 Plotting posterior predictions¶

Code 8.25¶

sample_alpha = posterior_8_4["alpha"][0]
sample_betaW = posterior_8_4["betaW"][0]
sample_betaS = posterior_8_4["betaS"][0]
sample_sigma = posterior_8_4["sigma"][0]

_, axes = plt.subplots(1, 3, figsize=(9, 3), sharey=True)  # 3 plots in 1 row
for ax, s in zip(axes, range(-1, 2)):
    idx = d.shade_cent == s
    ax.scatter(d.water_cent[idx], d.blooms_std[idx])
    ax.set(xlim=(-1.1, 1.1), ylim=(-0.1, 1.1), xlabel="water", ylabel="blooms")

    shade_cent = s
    water_cent = np.arange(-1, 2)

    jdc_8_4_test = model_8_4(
        water_cent=tf.cast(water_cent, dtype=tf.float32),
        shade_cent=tf.cast(shade_cent, dtype=tf.float32),
    )

    ds, _ = jdc_8_4_test.sample_distributions(
        value=[sample_alpha, sample_betaW, sample_betaS, sample_sigma, None]
    )

    mu = tf.squeeze(ds[-1].distribution.loc)

    for i in range(20):
        ax.plot(range(-1, 2), mu[i], "k", alpha=0.3)

../_images/08_conditional_manatees_81_0.png

# Code for plotting model 8.5 is not listed in the book but there are corresponding figures so
# draw them here

sample_alpha = posterior_8_5["alpha"][0]
sample_betaW = posterior_8_5["betaW"][0]
sample_betaS = posterior_8_5["betaS"][0]
sample_betaWS = posterior_8_5["betaWS"][0]
sample_sigma = posterior_8_5["sigma"][0]

_, axes = plt.subplots(1, 3, figsize=(9, 3), sharey=True)  # 3 plots in 1 row
for ax, s in zip(axes, range(-1, 2)):
    idx = d.shade_cent == s
    ax.scatter(d.water_cent[idx], d.blooms_std[idx])
    ax.set(xlim=(-1.1, 1.1), ylim=(-0.1, 1.1), xlabel="water", ylabel="blooms")

    shade_cent = s
    water_cent = np.arange(-1, 2)

    jdc_8_5_test = model_8_5(
        water_cent=tf.cast(water_cent, dtype=tf.float32),
        shade_cent=tf.cast(shade_cent, dtype=tf.float32),
    )

    ds, _ = jdc_8_5_test.sample_distributions(
        value=[
            sample_alpha,
            sample_betaW,
            sample_betaS,
            sample_betaWS,
            sample_sigma,
            None,
        ]
    )

    mu = tf.squeeze(ds[-1].distribution.loc)

    for i in range(20):
        ax.plot(range(-1, 2), mu[i], "k", alpha=0.3)

../_images/08_conditional_manatees_82_0.png

8.3.4 Plotting prior predictions¶

Code 8.26¶

# prior for 8.5
prior_8_5 = jdc_8_5.sample(1000)

Statistical Rethinking (2nd Edition)

8. Conditional Manatees

Contents