Graph with Iteration-loop

Question

Graph with Iteration-loop

fionser opened this issue 4 years ago · comments

Hi,

I am writing a logistic regression model training using EVA.
I have some questions on "updating encrypted LR model"

Basically, we need to update the LR model like '[w] <- [w] + gamma * [g]` in a for-loop.
But I counted "Bad variant access" error.

Olli Saarikivi · Answer 1 · Sat Jan 09 2021 03:24:51 GMT+0800 (China Standard Time)

Hi! Thank you for filing an issue. A "Bad variant access" is most likely a bug in EVA. Would you be able to share a Python script with an EVA program that triggers this bug?

Having loops in PyEVA programs should not be a problem. The first compilation step from PyEVA into EVA IR uses a tracing approach that will unroll any loops into a DAG of instructions.

lwj · Answer 2 · Mon Jan 11 2021 09:12:18 GMT+0800 (China Standard Time)

from eva import *
from eva.ckks import CKKSCompiler
from eva.seal import generate_keys
from eva.metric import valuation_mse
import numpy as np

class Encoder:
    def __init__(self, nslots):
        self.nslots = nslots

    def encode_mat(self, A):
        nr, nc = A.shape
        assert nr * nc == self.nslots
        return A.reshape(nr * nc)

    def encode_vec(self, vec):
        if len(vec.shape) == 1:
            return self._encode_col_vec(vec)
        n, m = vec.shape
        assert n == 1 or m == 1
        if n == 1:
            return self._encode_row_vec(vec)
        else:
            return self._encode_col_vec(vec)

    def _encode_row_vec(self, vec):
        n = vec.shape[1]
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(n):
            ret[r*nrep:(r+1)*nrep] = vec[0][r]
        return ret

    def _encode_col_vec(self, vec):
        n = vec.shape[0]
        nrep = self.nslots // n
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(nrep):
            ret[r*n:(r+1)*n] = vec.T
        return ret

def define_LR(batch_sze, n_features, eta=0.1, gamma=0.9, n_iter=1):
    def NAG_update(Z, v, w, mask):
        ## sigmoid(-x) ~ c0 + c1*x + c2*x^3
        sigmoid_coeff = [0.5, -0.15012, 0.001593]
        R = (-(1 - eta) * gamma * sigmoid_coeff[2]) * Z
        ll = SumCols(Z * v, mask, batch_sze, n_features)
        N = (-gamma * sigmoid_coeff[2]) * Z

        S = R * ll
        Mt = ll * ll + (sigmoid_coeff[0] / sigmoid_coeff[2])
        O = N * ll

        T = ((1 - eta) * gamma * sigmoid_coeff[0]) * Z
        P = (gamma * sigmoid_coeff[0]) * Z
        U = Mt * S + T

        Y = gamma * w
        X = (1. - eta) * v
        Q = Mt * O + P
        #Y, X, Q = gamma * w, (1 - eta) * v, Mt * O + P
        v_nxt = X + Y + SumRows(U, batch_sze, n_features)
        w_nxt = v + SumRows(Q, batch_sze, n_features)
        return (v_nxt, w_nxt)

    prog = EvaProgram('LR', vec_size=batch_sze*n_features)
    """
    c = gamma * Z.T * sigmoid(-Z * v)
    w_out = v + c
    v_out = (1 - eta) * w_out + eta * w
    """
    with prog:
        Z = Input('Z', True)
        v = Input('v', True)
        w = Input('w', True)
        mask = Input('mask', False) #Fixed when input `mask` as encrytped
        for ii in range(n_iter):
            v, w = NAG_update(Z, v, w, mask)
        Output('v_out', v)
        Output('w_out', w)

    prog.set_input_scales(40)
    prog.set_output_ranges(40)
    return prog

nslots = 32
n_features = 8
batch_sze = nslots // n_features
eta = 0.1
gamma = 0.1 / batch_sze

_Z = (np.random.rand(batch_sze, n_features) - 0.5) / 10.
_w = (np.random.rand(n_features, 1) - 0.5) / 10.
_v = (np.random.rand(n_features, 1) - 0.5) / 10.

n_iter = 2
w_out, v_out = NAG_update(Z=_Z, v=_v, w=_w, eta=eta, gamma=gamma, n_iter=n_iter)
encoder = Encoder(nslots)
mask = np.zeros((batch_sze, n_features))
mask[:, 0] = 1.
Z = encoder.encode_mat(_Z)
w = encoder.encode_vec(_w)
v = encoder.encode_vec(_v)
mask = encoder.encode_mat(mask)

lr_program = define_LR(batch_sze, n_features, eta=eta, gamma=gamma, n_iter=n_iter)
compiler = CKKSCompiler({'security_level':'0', 'warn_vec_size':'false'})
compiled, params, signature = compiler.compile(lr_program)

inputs = {'Z':Z, 'w':w, 'v':v, 'mask':mask}
public_ctx, secret_ctx = generate_keys(params)
enc_inputs = public_ctx.encrypt(inputs, signature)
enc_outputs = public_ctx.execute(compiled, enc_inputs)
outputs = secret_ctx.decrypt(enc_outputs, signature)

lwj · Answer 3 · Mon Jan 11 2021 09:13:08 GMT+0800 (China Standard Time)

I found that, when all the input are encryted, the program seems work.
However, when I switch some of the inputs as plaintext, the Bad variant access error raised.

Olli Saarikivi · Answer 4 · Thu Jan 14 2021 03:15:18 GMT+0800 (China Standard Time)

Great, thank you. The current iteration of mixed ciphertext/plaintext inputs, where plaintext encoding is delayed to program execution time, is a newer feature. I will look into this.

Kim Laine · Answer 5 · Sat Mar 13 2021 05:42:06 GMT+0800 (China Standard Time)

Could you share the full source code? We are missing for example the SumCols and SumRows functions. Also it seems there is some issue with how NAG_update is called in the code: it is called with 7 parameters but the function only takes 4.

lwj · Answer 6 · Mon Mar 15 2021 09:07:45 GMT+0800 (China Standard Time)

from eva import *
from eva.ckks import CKKSCompiler
from eva.seal import generate_keys
from eva.metric import valuation_mse
import numpy as np

class Encoder:
    def __init__(self, nslots):
        self.nslots = nslots

    def encode_mat(self, A):
        nr, nc = A.shape
        assert nr * nc == self.nslots
        return A.reshape(nr * nc)

    def encode_vec(self, vec):
        if len(vec.shape) == 1:
            return self._encode_col_vec(vec)
        n, m = vec.shape
        assert n == 1 or m == 1
        if n == 1:
            return self._encode_row_vec(vec)
        else:
            return self._encode_col_vec(vec)

    def _encode_row_vec(self, vec):
        n = vec.shape[1]
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(n):
            ret[r*nrep:(r+1)*nrep] = vec[0][r]
        return ret

    def _encode_col_vec(self, vec):
        n = vec.shape[0]
        nrep = self.nslots // n
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(nrep):
            ret[r*n:(r+1)*n] = vec.T
        return ret

    def show_mat(self, A, nrows=0):
        assert nrows > 0
        ncols = A.shape[0] // nrows
        print(A.reshape(nrows, ncols))

    def show_col_vec(self, vec, sze):
        print(vec[:sze])

    def show_row_vec(self, vec, sze):
        print(vec.reshape(sze, -1)[:, 0])

    """
        M, x -> M.T * x
    """
    def MTx(self, M, x,nrows=0):
        if len(M.shape) > 1:
            return M.T.dot(x)
        else:
            assert M.shape[0] == x.shape[0]
            assert nrows > 0
            ncols = M.shape[0] // nrows
            prod = M * x
            logm = int(np.log2(nrows))
            for i in range(0, logm):
                prod = prod + np.roll(prod, -ncols * (1 << i))
            return prod
    """
        x, M -> x.T * M.T
    """
    def xTMT(self, M, x, nrows=0):
        if len(M.shape) > 1:
            return M.dot(x).T
        else:
            assert M.shape[0] == x.shape[0]
            assert nrows > 0
            ncols = M.shape[0] // nrows
            prod = M * x
            logn = int(np.log2(ncols))
            for i in range(0, logn):
                prod = prod + np.roll(prod, -(1 << i))

            D = np.zeros((nrows, ncols))
            D[:, 0] = 1.
            prod *= self.encode_mat(D)
            for i in range(0, logn):
                prod = prod + np.roll(prod, 1 << i)
            return prod


def SumRows(A, nrows, ncols):
    logm = int(np.log2(nrows))
    for i in range(0, logm):
        tmp = A << (ncols * (1 << i))
        A = A + tmp
    return A

def SumCols(A, masking, nrows, ncols):
    logn = int(np.log2(ncols))
    for i in range(0, logn):
        tmp = A << (1 << i)
        A = A + tmp
    A = A * masking
    for i in range(0, logn):
        tmp = A >> i
        A = A + tmp
    return A

def define_LR(batch_sze, n_features, eta=0.1, gamma=0.9, n_iter=1):
    def NAG_update(Z, v, w, mask):
        ## sigmoid(-x) ~ c0 + c1*x + c2*x^3
        sigmoid_coeff = [0.5, -0.15012, 0.001593]
        R = (-(1 - eta) * gamma * sigmoid_coeff[2]) * Z
        ll = SumCols(Z * v, mask, batch_sze, n_features)
        N = (-gamma * sigmoid_coeff[2]) * Z

        S = R * ll
        Mt = ll * ll + (sigmoid_coeff[0] / sigmoid_coeff[2])
        O = N * ll

        T = ((1 - eta) * gamma * sigmoid_coeff[0]) * Z
        P = (gamma * sigmoid_coeff[0]) * Z
        U = Mt * S + T

        Y = gamma * w
        X = (1. - eta) * v
        Q = Mt * O + P
        #Y, X, Q = gamma * w, (1 - eta) * v, Mt * O + P
        v_nxt = X + Y + SumRows(U, batch_sze, n_features)
        w_nxt = v + SumRows(Q, batch_sze, n_features)
        return (v_nxt, w_nxt)

    prog = EvaProgram('LR', vec_size=batch_sze*n_features)
    """
    c = gamma * Z.T * sigmoid(-Z * v)
    w_out = v + c
    v_out = (1 - eta) * w_out + eta * w
    """
    with prog:
        Z = Input('Z', True)
        v = Input('v', True)
        w = Input('w', True)
        mask = Input('mask',  False) ## Fixed when mask is Plaintext
        for ii in range(n_iter):
            v, w = NAG_update(Z, v, w, mask)
        Output('v_out', v)
        Output('w_out', w)

    prog.set_input_scales(40)
    prog.set_output_ranges(40)
    return prog

## Plaintext
def NAG_update(Z, v, w, eta, gamma, n_iter=1):
    ## sigmoid(-x)
    def neg_sig(x):
        "" return 1. / (1. + np.exp(x))
        ""
        return 0.5 - 0.15012 * x + 0.001593 * (x**3)
        
    for _ in range(n_iter):
        ll = Z.dot(v)
        c = gamma * Z.T.dot(neg_sig(ll))
        w_out = v + c
        v_out = (1 - eta) * w_out + eta * w
        w, v = w_out, v_out
    return (w, v)

nslots = 32
n_features = 8
batch_sze = nslots // n_features
eta = 0.1
gamma = 0.1 / batch_sze

_Z = (np.random.rand(batch_sze, n_features) - 0.5) / 10.
_w = (np.random.rand(n_features, 1) - 0.5) / 10.
_v = (np.random.rand(n_features, 1) - 0.5) / 10.

n_iter = 2
w_out, v_out = NAG_update(Z=_Z, v=_v, w=_w, eta=eta, gamma=gamma, n_iter=n_iter)
encoder = Encoder(nslots)
mask = np.zeros((batch_sze, n_features))
mask[:, 0] = 1.
Z = encoder.encode_mat(_Z)
w = encoder.encode_vec(_w)
v = encoder.encode_vec(_v)
mask = encoder.encode_mat(mask)

lr_program = define_LR(batch_sze, n_features, eta=eta, gamma=gamma, n_iter=n_iter)
compiler = CKKSCompiler({'security_level':'0', 'warn_vec_size':'false'})
compiled, params, signature = compiler.compile(lr_program)

inputs = {'Z':Z, 'w':w, 'v':v, 'mask':mask}
public_ctx, secret_ctx = generate_keys(params)
enc_inputs = public_ctx.encrypt(inputs, signature)

enc_outputs = public_ctx.execute(compiled, enc_inputs)

outputs = secret_ctx.decrypt(enc_outputs, signature)
print(np.mean(np.abs(v_out.T - outputs['v_out'][:n_features])))
print(np.mean(np.abs(w_out.T - outputs['w_out'][:n_features])))

Olli Saarikivi · Answer 7 · Sat Apr 24 2021 03:36:25 GMT+0800 (China Standard Time)

Hi! This issue should be fixed in main now since merging in PR #9 from @sangeeta0201.