aloctavodia / Statistical-Rethinking-with-Python-and-PyMC3

Python/PyMC3 port of the examples in " Statistical Rethinking A Bayesian Course with Examples in R and Stan" by Richard McElreath

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Proposal for code 6.12

rosgori opened this issue · comments

This is the proposal for code 6.12 in Python

import numpy as np
import scipy.stats as stats
import pymc3 as pm
import theano

def sim_train_test(N=20, k=3, rho=[0.15, -0.4], b_sigma=100):
    
    n_dim = 1 + len(rho)
    if n_dim < k:
        n_dim = k
    Rho = np.diag(np.ones(n_dim))
    Rho[0, 1:3:1] = rho
    i_lower = np.tril_indices(n_dim, -1)
    Rho[i_lower] = Rho.T[i_lower]
    
    x_train = stats.multivariate_normal.rvs(cov=Rho, size=N)
    x_test = stats.multivariate_normal.rvs(cov=Rho, size=N)
    
    mm_train = np.ones((N,1))
    
    np.concatenate([mm_train, x_train[:, 1:k]], axis=1)
    
    #Hacer modelo con pymc3
    
    with pm.Model() as m_sim:
        vec_V = pm.MvNormal('vec_V', mu=0, tau=b_sigma * np.eye(n_dim), 
                            shape=(1, n_dim), testval=np.random.randn(1, n_dim)*.01)
        mu = pm.Deterministic('mu', 0 + theano.tensor.dot(x_train, vec_V.T))
        y = pm.Normal('y', mu=mu, sd=1, observed=x_train[:, 0])
        trace_m_sim = pm.sample(1000, tune=1000)
        
        
    vec = pm.summary(trace_m_sim)['mean'][:n_dim]
    vec = np.array([i for i in vec]).reshape(n_dim, -1)
    
    dev_train = - 2 * sum(stats.norm.logpdf(x_train, loc = np.matmul(x_train, vec), scale = 1))    
    
    mm_test = np.ones((N,1))
    
    mm_test = np.concatenate([mm_test, x_test[:, 1:k +1]], axis=1)
    
    dev_test = - 2 * sum(stats.norm.logpdf(x_test[:,0], loc = np.matmul(mm_test, vec), scale = 1))    
    
#    print('Deviation for training', '\t', 'Deviation for test')
#    print(np.mean(dev_train), '\t', np.mean(dev_test))
    
    return np.mean(dev_train), np.mean(dev_test)
    
    
n = 20
tries = 50
param = 5
r = np.zeros(shape=(param - 1, 4))

train = []
test = []

for j in range(2, param + 1):
    print(j)
    for i in range(1, tries):
        tr, te = sim_train_test(N=n, k=param)
        train.append(tr), test.append(te)
    r[j -2, :] = np.mean(train), np.std(train, ddof=1), np.mean(test), np.std(test, ddof=1)

Unfortunately, it is slower than R version (it's very, very slow). I know this code doesn't have comments, and to undestand it you have to look this one. I will try to opmitize it (although I don't know how).

If anyone has any ideas, I'd like to hear them.

Repeatedly defining the same model is slow due to theano compilation. My suggestion is to try use theano.shared to update the value instead.