Proposal for code 6.12

Question

Proposal for code 6.12

rosgori opened this issue 6 years ago · comments

This is the proposal for code 6.12 in Python

import numpy as np
import scipy.stats as stats
import pymc3 as pm
import theano

def sim_train_test(N=20, k=3, rho=[0.15, -0.4], b_sigma=100):
    
    n_dim = 1 + len(rho)
    if n_dim < k:
        n_dim = k
    Rho = np.diag(np.ones(n_dim))
    Rho[0, 1:3:1] = rho
    i_lower = np.tril_indices(n_dim, -1)
    Rho[i_lower] = Rho.T[i_lower]
    
    x_train = stats.multivariate_normal.rvs(cov=Rho, size=N)
    x_test = stats.multivariate_normal.rvs(cov=Rho, size=N)
    
    mm_train = np.ones((N,1))
    
    np.concatenate([mm_train, x_train[:, 1:k]], axis=1)
    
    #Hacer modelo con pymc3
    
    with pm.Model() as m_sim:
        vec_V = pm.MvNormal('vec_V', mu=0, tau=b_sigma * np.eye(n_dim), 
                            shape=(1, n_dim), testval=np.random.randn(1, n_dim)*.01)
        mu = pm.Deterministic('mu', 0 + theano.tensor.dot(x_train, vec_V.T))
        y = pm.Normal('y', mu=mu, sd=1, observed=x_train[:, 0])
        trace_m_sim = pm.sample(1000, tune=1000)
        
        
    vec = pm.summary(trace_m_sim)['mean'][:n_dim]
    vec = np.array([i for i in vec]).reshape(n_dim, -1)
    
    dev_train = - 2 * sum(stats.norm.logpdf(x_train, loc = np.matmul(x_train, vec), scale = 1))    
    
    mm_test = np.ones((N,1))
    
    mm_test = np.concatenate([mm_test, x_test[:, 1:k +1]], axis=1)
    
    dev_test = - 2 * sum(stats.norm.logpdf(x_test[:,0], loc = np.matmul(mm_test, vec), scale = 1))    
    
#    print('Deviation for training', '\t', 'Deviation for test')
#    print(np.mean(dev_train), '\t', np.mean(dev_test))
    
    return np.mean(dev_train), np.mean(dev_test)
    
    
n = 20
tries = 50
param = 5
r = np.zeros(shape=(param - 1, 4))

train = []
test = []

for j in range(2, param + 1):
    print(j)
    for i in range(1, tries):
        tr, te = sim_train_test(N=n, k=param)
        train.append(tr), test.append(te)
    r[j -2, :] = np.mean(train), np.std(train, ddof=1), np.mean(test), np.std(test, ddof=1)

Unfortunately, it is slower than R version (it's very, very slow). I know this code doesn't have comments, and to undestand it you have to look this one. I will try to opmitize it (although I don't know how).

If anyone has any ideas, I'd like to hear them.

Junpeng Lao · Answer 1 · Thu Apr 19 2018 14:23:49 GMT+0800 (China Standard Time)

Repeatedly defining the same model is slow due to theano compilation. My suggestion is to try use theano.shared to update the value instead.