Contents

Chapter 11: Bayesian statistics

Contents

Chapter 11: Bayesian statistics#

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, t, binom, scoreatpercentile
import pingouin as pg
import matplotlib

import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
from rpy2.robjects.conversion import localconverter
# import NHANES package
base = importr('NHANES')
%load_ext rpy2.ipython

with localconverter(ro.default_converter + pandas2ri.converter):
  NHANES = ro.conversion.rpy2py(ro.r['NHANES'])

 
NHANES = NHANES.drop_duplicates(subset='ID')
NHANES_adult = NHANES.dropna(subset=['Weight']).query('Age > 17 and BPSysAve > 0')

rng = np.random.default_rng(123456)

Figure 11.2#

nResponders = 64
nTested = 100

drugDf = pd.DataFrame({'outcome': ["improved", "not improved"],
                       'number': [nResponders, nTested - nResponders]})

likeDf = pd.DataFrame({'resp': np.arange(1, 100)})
likeDf['presp'] = likeDf.resp / 100
for p in [.3, .5, .7]:
    label = f'likelihood{(p * 10):.0f}'
    likeDf[label] = binom.pmf(likeDf.resp, 100, p)
    sns.lineplot(data=likeDf, x='resp', y=label)
plt.legend(['p=0.3', 'p=0.5', 'p=0.7'])
plt.plot([drugDf.number[0], drugDf.number[0]], [0, .1])
plt.ylabel('likelihood')
plt.xlabel('number of responders')

Text(0.5, 0, 'number of responders')

_images/11-BayesianStatistics_3_1.png

Figure 11.3#

likeDf['uniform_prior'] = 1 / likeDf.shape[0]

marginal_likelihood = np.sum(binom.pmf(nResponders, nTested, likeDf.presp) * likeDf.uniform_prior)

bayesDf = pd.DataFrame({'steps': np.arange(.01, 1.0, .01)})
bayesDf['likelihoods'] = binom.pmf(nResponders, nTested, bayesDf.steps)
bayesDf['priors'] = 1 / bayesDf.shape[0]
bayesDf['posteriors'] = (bayesDf.likelihoods * bayesDf.priors) / marginal_likelihood
maxidx = bayesDf.posteriors.argmax()
MAP = bayesDf.steps[maxidx]

sns.lineplot(data=bayesDf, x='steps', y='posteriors')
plt.scatter([MAP], bayesDf.posteriors.max(), color='black')
sns.lineplot(data=bayesDf, x='steps', y='priors', color='black', linewidth=.5, linestyle='dashed')
plt.xlabel('p(respond)')
plt.ylabel('posterior probability of the observed data')

Text(0, 0.5, 'posterior probability of the observed data')

_images/11-BayesianStatistics_5_1.png

Figure 11.4#

fig, ax = plt.subplots(2, 2, figsize=(12,8))


bayesDf = pd.DataFrame({'steps': np.arange(.01, 1.0, .01)})
bayesDf['likelihoods'] = binom.pmf(nResponders, nTested, bayesDf.steps)
bayesDf['priors_flat'] = 1 / bayesDf.shape[0]
bayesDf['priors_absolute'] = 0
bayesDf.loc[bayesDf.steps > 0.8, 'priors_absolute'] = 1
bayesDf.priors_absolute = bayesDf.priors_absolute / bayesDf.priors_absolute.sum()
bayesDf['priors_empirical_weak'] = binom.pmf(10, 20, bayesDf.steps) / np.sum(binom.pmf(10, 20, bayesDf.steps))
bayesDf['priors_empirical_strong'] = binom.pmf(250, 500, bayesDf.steps) / np.sum(binom.pmf(250, 500, bayesDf.steps))

marginal_likelihood_flat = np.sum(binom.pmf(nResponders, nTested, bayesDf.steps) * bayesDf.priors_flat)
marginal_likelihood_weak = np.sum(binom.pmf(nResponders, nTested, bayesDf.steps) * bayesDf.priors_empirical_weak)
marginal_likelihood_strong = np.sum(binom.pmf(nResponders, nTested, bayesDf.steps) * bayesDf.priors_empirical_strong)
marginal_likelihood_absolute = np.sum(binom.pmf(nResponders, nTested, bayesDf.steps) * bayesDf.priors_absolute)


bayesDf['posteriors_flat'] = (bayesDf.likelihoods * bayesDf.priors_flat) / marginal_likelihood_flat
bayesDf['posteriors_weak'] = (bayesDf.likelihoods * bayesDf.priors_empirical_weak) / marginal_likelihood_weak
bayesDf['posteriors_strong'] = (bayesDf.likelihoods * bayesDf.priors_empirical_strong) / marginal_likelihood_strong
bayesDf['posteriors_absolute'] = (bayesDf.likelihoods * bayesDf.priors_absolute) / marginal_likelihood_absolute

sns.lineplot(data=bayesDf, x='steps', y='posteriors_flat', ax=ax[0][0], color='blue')
sns.lineplot(data=bayesDf, x='steps', y='posteriors_weak', ax=ax[0][0], color='red')
sns.lineplot(data=bayesDf, x='steps', y='priors_empirical_weak', ax=ax[0][0], linestyle='dotted')

sns.lineplot(data=bayesDf, x='steps', y='posteriors_weak', ax=ax[0][1], color='blue')
sns.lineplot(data=bayesDf, x='steps', y='posteriors_strong', ax=ax[0][1], color='red')
sns.lineplot(data=bayesDf, x='steps', y='priors_empirical_strong', ax=ax[0][1], linestyle='dotted')

sns.lineplot(data=bayesDf, x='steps', y='posteriors_absolute', ax=ax[1][0], color='blue')
bayesDf.loc[bayesDf.steps > 0.8, 'priors_absolute'] = 0.7  # rescale for figure
sns.lineplot(data=bayesDf, x='steps', y='priors_absolute', ax=ax[1][0], linestyle='dotted')


for i in range(2):
    for j in range(2):
        ax[i][j].set_xlabel("p(heads)")
        ax[i][j].set_ylabel("Posterior probability")

ax[1][1].set_visible(False)

_images/11-BayesianStatistics_7_0.png

Figure 11.5#

nsubs = 40
effect_size = 0.2
rng = np.random.default_rng(12345)

drugDf = pd.DataFrame({'group': ['Drug' if rng.uniform() > .5 else 'Placebo' for i in range(nsubs)]})
drugDf['hbchange'] =  rng.uniform(size=nsubs) - (drugDf.group == 'Drug') * effect_size

sns.boxplot(data=drugDf, x='group', y='hbchange')
plt.ylabel('Change in hemoglobin A1C')

Text(0, 0.5, 'Change in hemoglobin A1C')

_images/11-BayesianStatistics_9_1.png

T-test for drug example#

tt = pg.ttest(x=drugDf.query('group == "Placebo"').hbchange, 
              y=drugDf.query('group == "Drug"').hbchange,
              correction=True, alternative='greater')
tt

	T	dof	alternative	p-val	CI95%	cohen-d	BF10	power
T-test	2.60501	31.58191	greater	0.006945	[0.07, inf]	0.823776	8.104	0.819463

Bayes factor for drug example#

BF10 is generated by pg.ttest in cell above, but its method for one vs. two-tailed tests differs from that used in the BayesFactor package in R.

%%R -i drugDf

library(BayesFactor)
bf_drug <- ttestBF(
  formula = hbchange ~ group, data = drugDf,
  nullInterval = c(0, Inf)
)
bf_drug

R[write to console]: Loading required package: coda

R[write to console]: Loading required package: Matrix

R[write to console]: ************
Welcome to BayesFactor 0.9.12-4.4. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).

Type BFManual() to open the manual.
************

Bayes factor analysis
--------------

[1] Alt., r=0.707 0<d<Inf

0.1007666

±

[2] Alt., r=0.707 !(0<d<Inf)

8.00271

±

Against denominator:

Null, mu1-mu2 = 0

---
Bayes factor type: 

BFindepSample

JZS

Bayes factor for one-sided tests#

%%R 

bf_drug[1]/bf_drug[2]

Bayes factor analysis
--------------

[1] Alt., r=0.707 0<d<Inf

0.01259155

±

Against denominator:

Alternative, r = 0.707106781186548, mu =/= 0 !(0<d<Inf)

---
Bayes factor type: 

BFindepSample

JZS

Table 11.1#

# Compute credible intervals for example

nsamples = 100000

# create random uniform variates for x and y
x = rng.uniform(size=nsamples)
y = rng.uniform(size=nsamples)

# create f(x)
fx = binom.pmf(nResponders,nTested, x)

# accept samples where y < f(x)
accepted_samples = x[y < fx]

credible_interval = scoreatpercentile(accepted_samples, (2.5, 97.5))

credible_interval

array([0.54525307, 0.72923171])

Figure 11.6#

sns.kdeplot(accepted_samples, color='red')
for c in credible_interval:
    plt.plot([c, c], [0, 2], color='blue')

_images/11-BayesianStatistics_19_0.png