microsoft / EVA

Compiler for the SEAL homomorphic encryption library

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Graph with Iteration-loop

fionser opened this issue · comments

commented

Hi,

I am writing a logistic regression model training using EVA.
I have some questions on "updating encrypted LR model"

Basically, we need to update the LR model like '[w] <- [w] + gamma * [g]` in a for-loop.
But I counted "Bad variant access" error.

Hi! Thank you for filing an issue. A "Bad variant access" is most likely a bug in EVA. Would you be able to share a Python script with an EVA program that triggers this bug?

Having loops in PyEVA programs should not be a problem. The first compilation step from PyEVA into EVA IR uses a tracing approach that will unroll any loops into a DAG of instructions.

commented
from eva import *
from eva.ckks import CKKSCompiler
from eva.seal import generate_keys
from eva.metric import valuation_mse
import numpy as np

class Encoder:
    def __init__(self, nslots):
        self.nslots = nslots

    def encode_mat(self, A):
        nr, nc = A.shape
        assert nr * nc == self.nslots
        return A.reshape(nr * nc)

    def encode_vec(self, vec):
        if len(vec.shape) == 1:
            return self._encode_col_vec(vec)
        n, m = vec.shape
        assert n == 1 or m == 1
        if n == 1:
            return self._encode_row_vec(vec)
        else:
            return self._encode_col_vec(vec)

    def _encode_row_vec(self, vec):
        n = vec.shape[1]
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(n):
            ret[r*nrep:(r+1)*nrep] = vec[0][r]
        return ret

    def _encode_col_vec(self, vec):
        n = vec.shape[0]
        nrep = self.nslots // n
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(nrep):
            ret[r*n:(r+1)*n] = vec.T
        return ret

def define_LR(batch_sze, n_features, eta=0.1, gamma=0.9, n_iter=1):
    def NAG_update(Z, v, w, mask):
        ## sigmoid(-x) ~ c0 + c1*x + c2*x^3
        sigmoid_coeff = [0.5, -0.15012, 0.001593]
        R = (-(1 - eta) * gamma * sigmoid_coeff[2]) * Z
        ll = SumCols(Z * v, mask, batch_sze, n_features)
        N = (-gamma * sigmoid_coeff[2]) * Z

        S = R * ll
        Mt = ll * ll + (sigmoid_coeff[0] / sigmoid_coeff[2])
        O = N * ll

        T = ((1 - eta) * gamma * sigmoid_coeff[0]) * Z
        P = (gamma * sigmoid_coeff[0]) * Z
        U = Mt * S + T

        Y = gamma * w
        X = (1. - eta) * v
        Q = Mt * O + P
        #Y, X, Q = gamma * w, (1 - eta) * v, Mt * O + P
        v_nxt = X + Y + SumRows(U, batch_sze, n_features)
        w_nxt = v + SumRows(Q, batch_sze, n_features)
        return (v_nxt, w_nxt)

    prog = EvaProgram('LR', vec_size=batch_sze*n_features)
    """
    c = gamma * Z.T * sigmoid(-Z * v)
    w_out = v + c
    v_out = (1 - eta) * w_out + eta * w
    """
    with prog:
        Z = Input('Z', True)
        v = Input('v', True)
        w = Input('w', True)
        mask = Input('mask', False) #Fixed when input `mask` as encrytped
        for ii in range(n_iter):
            v, w = NAG_update(Z, v, w, mask)
        Output('v_out', v)
        Output('w_out', w)

    prog.set_input_scales(40)
    prog.set_output_ranges(40)
    return prog

nslots = 32
n_features = 8
batch_sze = nslots // n_features
eta = 0.1
gamma = 0.1 / batch_sze

_Z = (np.random.rand(batch_sze, n_features) - 0.5) / 10.
_w = (np.random.rand(n_features, 1) - 0.5) / 10.
_v = (np.random.rand(n_features, 1) - 0.5) / 10.

n_iter = 2
w_out, v_out = NAG_update(Z=_Z, v=_v, w=_w, eta=eta, gamma=gamma, n_iter=n_iter)
encoder = Encoder(nslots)
mask = np.zeros((batch_sze, n_features))
mask[:, 0] = 1.
Z = encoder.encode_mat(_Z)
w = encoder.encode_vec(_w)
v = encoder.encode_vec(_v)
mask = encoder.encode_mat(mask)

lr_program = define_LR(batch_sze, n_features, eta=eta, gamma=gamma, n_iter=n_iter)
compiler = CKKSCompiler({'security_level':'0', 'warn_vec_size':'false'})
compiled, params, signature = compiler.compile(lr_program)

inputs = {'Z':Z, 'w':w, 'v':v, 'mask':mask}
public_ctx, secret_ctx = generate_keys(params)
enc_inputs = public_ctx.encrypt(inputs, signature)
enc_outputs = public_ctx.execute(compiled, enc_inputs)
outputs = secret_ctx.decrypt(enc_outputs, signature)
commented
  • I found that, when all the input are encryted, the program seems work.
  • However, when I switch some of the inputs as plaintext, the Bad variant access error raised.

Great, thank you. The current iteration of mixed ciphertext/plaintext inputs, where plaintext encoding is delayed to program execution time, is a newer feature. I will look into this.

Could you share the full source code? We are missing for example the SumCols and SumRows functions. Also it seems there is some issue with how NAG_update is called in the code: it is called with 7 parameters but the function only takes 4.

commented
from eva import *
from eva.ckks import CKKSCompiler
from eva.seal import generate_keys
from eva.metric import valuation_mse
import numpy as np

class Encoder:
    def __init__(self, nslots):
        self.nslots = nslots

    def encode_mat(self, A):
        nr, nc = A.shape
        assert nr * nc == self.nslots
        return A.reshape(nr * nc)

    def encode_vec(self, vec):
        if len(vec.shape) == 1:
            return self._encode_col_vec(vec)
        n, m = vec.shape
        assert n == 1 or m == 1
        if n == 1:
            return self._encode_row_vec(vec)
        else:
            return self._encode_col_vec(vec)

    def _encode_row_vec(self, vec):
        n = vec.shape[1]
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(n):
            ret[r*nrep:(r+1)*nrep] = vec[0][r]
        return ret

    def _encode_col_vec(self, vec):
        n = vec.shape[0]
        nrep = self.nslots // n
        nrep = self.nslots // n
        ret = np.zeros(self.nslots)
        for r in range(nrep):
            ret[r*n:(r+1)*n] = vec.T
        return ret

    def show_mat(self, A, nrows=0):
        assert nrows > 0
        ncols = A.shape[0] // nrows
        print(A.reshape(nrows, ncols))

    def show_col_vec(self, vec, sze):
        print(vec[:sze])

    def show_row_vec(self, vec, sze):
        print(vec.reshape(sze, -1)[:, 0])

    """
        M, x -> M.T * x
    """
    def MTx(self, M, x,nrows=0):
        if len(M.shape) > 1:
            return M.T.dot(x)
        else:
            assert M.shape[0] == x.shape[0]
            assert nrows > 0
            ncols = M.shape[0] // nrows
            prod = M * x
            logm = int(np.log2(nrows))
            for i in range(0, logm):
                prod = prod + np.roll(prod, -ncols * (1 << i))
            return prod
    """
        x, M -> x.T * M.T
    """
    def xTMT(self, M, x, nrows=0):
        if len(M.shape) > 1:
            return M.dot(x).T
        else:
            assert M.shape[0] == x.shape[0]
            assert nrows > 0
            ncols = M.shape[0] // nrows
            prod = M * x
            logn = int(np.log2(ncols))
            for i in range(0, logn):
                prod = prod + np.roll(prod, -(1 << i))

            D = np.zeros((nrows, ncols))
            D[:, 0] = 1.
            prod *= self.encode_mat(D)
            for i in range(0, logn):
                prod = prod + np.roll(prod, 1 << i)
            return prod


def SumRows(A, nrows, ncols):
    logm = int(np.log2(nrows))
    for i in range(0, logm):
        tmp = A << (ncols * (1 << i))
        A = A + tmp
    return A

def SumCols(A, masking, nrows, ncols):
    logn = int(np.log2(ncols))
    for i in range(0, logn):
        tmp = A << (1 << i)
        A = A + tmp
    A = A * masking
    for i in range(0, logn):
        tmp = A >> i
        A = A + tmp
    return A

def define_LR(batch_sze, n_features, eta=0.1, gamma=0.9, n_iter=1):
    def NAG_update(Z, v, w, mask):
        ## sigmoid(-x) ~ c0 + c1*x + c2*x^3
        sigmoid_coeff = [0.5, -0.15012, 0.001593]
        R = (-(1 - eta) * gamma * sigmoid_coeff[2]) * Z
        ll = SumCols(Z * v, mask, batch_sze, n_features)
        N = (-gamma * sigmoid_coeff[2]) * Z

        S = R * ll
        Mt = ll * ll + (sigmoid_coeff[0] / sigmoid_coeff[2])
        O = N * ll

        T = ((1 - eta) * gamma * sigmoid_coeff[0]) * Z
        P = (gamma * sigmoid_coeff[0]) * Z
        U = Mt * S + T

        Y = gamma * w
        X = (1. - eta) * v
        Q = Mt * O + P
        #Y, X, Q = gamma * w, (1 - eta) * v, Mt * O + P
        v_nxt = X + Y + SumRows(U, batch_sze, n_features)
        w_nxt = v + SumRows(Q, batch_sze, n_features)
        return (v_nxt, w_nxt)

    prog = EvaProgram('LR', vec_size=batch_sze*n_features)
    """
    c = gamma * Z.T * sigmoid(-Z * v)
    w_out = v + c
    v_out = (1 - eta) * w_out + eta * w
    """
    with prog:
        Z = Input('Z', True)
        v = Input('v', True)
        w = Input('w', True)
        mask = Input('mask',  False) ## Fixed when mask is Plaintext
        for ii in range(n_iter):
            v, w = NAG_update(Z, v, w, mask)
        Output('v_out', v)
        Output('w_out', w)

    prog.set_input_scales(40)
    prog.set_output_ranges(40)
    return prog

## Plaintext
def NAG_update(Z, v, w, eta, gamma, n_iter=1):
    ## sigmoid(-x)
    def neg_sig(x):
        "" return 1. / (1. + np.exp(x))
        ""
        return 0.5 - 0.15012 * x + 0.001593 * (x**3)
        
    for _ in range(n_iter):
        ll = Z.dot(v)
        c = gamma * Z.T.dot(neg_sig(ll))
        w_out = v + c
        v_out = (1 - eta) * w_out + eta * w
        w, v = w_out, v_out
    return (w, v)

nslots = 32
n_features = 8
batch_sze = nslots // n_features
eta = 0.1
gamma = 0.1 / batch_sze

_Z = (np.random.rand(batch_sze, n_features) - 0.5) / 10.
_w = (np.random.rand(n_features, 1) - 0.5) / 10.
_v = (np.random.rand(n_features, 1) - 0.5) / 10.

n_iter = 2
w_out, v_out = NAG_update(Z=_Z, v=_v, w=_w, eta=eta, gamma=gamma, n_iter=n_iter)
encoder = Encoder(nslots)
mask = np.zeros((batch_sze, n_features))
mask[:, 0] = 1.
Z = encoder.encode_mat(_Z)
w = encoder.encode_vec(_w)
v = encoder.encode_vec(_v)
mask = encoder.encode_mat(mask)

lr_program = define_LR(batch_sze, n_features, eta=eta, gamma=gamma, n_iter=n_iter)
compiler = CKKSCompiler({'security_level':'0', 'warn_vec_size':'false'})
compiled, params, signature = compiler.compile(lr_program)

inputs = {'Z':Z, 'w':w, 'v':v, 'mask':mask}
public_ctx, secret_ctx = generate_keys(params)
enc_inputs = public_ctx.encrypt(inputs, signature)

enc_outputs = public_ctx.execute(compiled, enc_inputs)

outputs = secret_ctx.decrypt(enc_outputs, signature)
print(np.mean(np.abs(v_out.T - outputs['v_out'][:n_features])))
print(np.mean(np.abs(w_out.T - outputs['w_out'][:n_features])))

Hi! This issue should be fixed in main now since merging in PR #9 from @sangeeta0201.