google-parfait / tensorflow-federated

An open-source framework for machine learning and other computations on decentralized data.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Facing error in "Learning Attribute" Please Help.

sanjoyghimire opened this issue · comments

While doing this

train_metrics = evaluation(metrics.state, federated_train_data)

I'm facing an error

AttributeError Traceback (most recent call last)
in <cell line: 1>()
----> 1 train_metrics = evaluation(metrics.state, federated_train_data)

AttributeError: 'collections.OrderedDict' object has no attribute 'state'

The whole code is:

!pip install tensorflow-federated

import numpy as np
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')

import os

benign_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.benign.csv')
g_c_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.combo.csv')
g_j_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.junk.csv')
g_s_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.scan.csv')
g_t_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.tcp.csv')
g_u_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.udp.csv')
m_a_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.ack.csv')
m_sc_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.scan.csv')
m_sy_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.syn.csv')
m_u_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.udp.csv')
m_u_p_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.udpplain.csv')

benign_df['type'] = 'benign'
m_u_df['type'] = 'mirai_udp'
g_c_df['type'] = 'gafgyt_combo'
g_j_df['type'] = 'gafgyt_junk'
g_s_df['type'] = 'gafgyt_scan'
g_t_df['type'] = 'gafgyt_tcp'
g_u_df['type'] = 'gafgyt_udp'
m_a_df['type'] = 'mirai_ack'
m_sc_df['type'] = 'mirai_scan'
m_sy_df['type'] = 'mirai_syn'
m_u_p_df['type'] = 'mirai_udpplain'

df = pd.concat([benign_df, m_u_df, g_c_df,
g_j_df, g_s_df, g_t_df,
g_u_df, m_a_df, m_sc_df,
m_sy_df, m_u_p_df],
axis=0, sort=False, ignore_index=True)

df["type"].value_counts()

from matplotlib import pyplot as plt

plt.title("Class Distribution")
df.groupby("type").size().plot(kind='pie', autopct='%.2f', figsize=(20,10))

df.info()

df = df.sample(frac=1).reset_index(drop=True)

df.head()

import random

num_client = 4

df["client"] = ["client_{}".format(random.randint(1, num_client)) for _ in range(df.shape[0])]

from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["type"])

features = list(train_df.columns)
features.remove("type")
features.remove("client")

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
train_df["type"] = label_encoder.fit_transform(train_df["type"])
test_df["type"] = label_encoder.transform(test_df["type"])

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

train_df[features] = train_df[features].astype("float32")
test_df[features] = test_df[features].astype("float32")

train_df["type"] = train_df["type"].astype("int32")
test_df["type"] = test_df["type"].astype("int32")

import nest_asyncio
nest_asyncio.apply()

%load_ext tensorboard

import collections

import numpy as np
import tensorflow as tf
import tensorflow_federated as tff

np.random.seed(0)

tff.federated_computation(lambda: 'Hello, World!')()

client_id_colname = 'client'

client_ids = df[client_id_colname].unique()

train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]

train_client_ids

from collections import OrderedDict
from tensorflow.keras.utils import to_categorical

NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 5

def create_tf_dataset_for_client_fn(client_id):
client_data = dataframe[dataframe[client_id_colname] == client_id]
client_data_dict = OrderedDict()
client_data_dict["features"] = np.array(client_data[features].values, dtype="float32")
client_data_dict["label"] = np.array(client_data["type"].values, dtype="int32")

dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
return dataset

dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
client_ids=train_client_ids,
serializable_dataset_fn=create_tf_dataset_for_client_fn)

dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
client_ids=test_client_ids,
serializable_dataset_fn=create_tf_dataset_for_client_fn)

train_data.element_type_structure

test_data.element_type_structure

example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_element['label'].numpy()

from collections import defaultdict

f = plt.figure(figsize=(20, 10))
f.suptitle('Label Counts for a Sample of Clients')
for i, c_ids in enumerate(train_data.client_ids):
client_dataset = train_data.create_tf_dataset_for_client(c_ids)
plot_data = defaultdict(list)
for example in client_dataset:
label = example['label'].numpy()[0]
plot_data[label].append(label)
plt.subplot(2, 4, i+1)
plt.title('Client {}'.format(c_ids))
for j in range(10):
plt.hist(plot_data[j], density=False, bins=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

import collections

NUM_EPOCHS = 5
BATCH_SIZE = 128
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):
def batch_format_fn(element):
return collections.OrderedDict(x=tf.reshape(element['features'], [-1, len(features)]),
y=tf.reshape(element['label'], [-1, 1]))

return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
  BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
next(iter(preprocessed_example_dataset)))

from tqdm import tqdm

def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]

NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))

sample_clients = train_data.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train_data, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

def create_keras_model():
filters = 32
input_shape = (len(features))
num_classes = len(label_encoder.classes_)
clf = tf.keras.models.Sequential(
[
tf.keras.layers.Dense(64, input_dim=input_shape, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(num_classes, activation='softmax')
])
return clf

keras_model = create_keras_model()
keras_model.summary()

import keras.backend as K

class F1Score(tf.keras.metrics.Metric):
def init(self, name='F1-Score', **kwargs):
super(F1Score, self).init(name=name, **kwargs)
self.f1_score = self.add_weight(name='f1_score', initializer='zeros')

def update_state(self, y_true, y_pred, sample_weight=None):
    true_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true * y_pred, 0, 1)))

    possible_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())

    predicted_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())

    self.f1_score.assign(2*((precision*recall)/(precision+recall+K.epsilon())))

def result(self):
    return self.f1_score

def reset_states(self):
    self.f1_score.assign(0.0)

from keras.metrics import Recall, Precision

def model_fn():
keras_model = tf.keras.models.Sequential([ tf.keras.layers.Dense(64, input_dim=115, activation='relu'),
tf.keras.layers.Dense(len(features), activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')]) # Your model structure
return tff.learning.models.from_keras_model(
keras_model=keras_model,
input_spec=preprocessed_example_dataset.element_spec, # Define the expected input format
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

keras_model = create_keras_model()
keras_model.summary()

iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
model_fn=model_fn,
client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1),
server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=2))

str(iterative_process.initialize.type_signature)

state = iterative_process.initialize()

state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))

NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))

train_logdir = "training/"
os.makedirs(train_logdir, exist_ok=True)

test_logdir = "testing/"
os.makedirs(test_logdir, exist_ok=True)

summary_writer = tf.summary.create_file_writer(train_logdir)
state = iterative_process.initialize()

with summary_writer.as_default():
for round_num in range(1, NUM_ROUNDS):
state, metrics = iterative_process.next(state, federated_train_data)
client_work_metrics = metrics['client_work']
for name, value in client_work_metrics['train'].items():
tf.summary.scalar(name, value, step=round_num)

!ls {train_logdir}
%tensorboard --logdir {train_logdir} --port=0

ModelVariables = collections.namedtuple('ModelVariables', 'weights bias num_examples loss_sum accuracy_sum')

def create_model_variables():
return ModelVariables(
weights=tf.Variable(
lambda: tf.zeros(dtype=tf.float32, shape=(len(features), len(label_encoder.classes_))),
name='weights',
trainable=True),
bias=tf.Variable(
lambda: tf.zeros(dtype=tf.float32, shape=(len(label_encoder.classes_))),
name='bias',
trainable=True),
num_examples=tf.Variable(0.0, name='num_examples', trainable=False),
loss_sum=tf.Variable(0.0, name='loss_sum', trainable=False),
accuracy_sum=tf.Variable(0.0, name='accuracy_sum', trainable=False))

def predict_on_batch(variables, x):
return tf.nn.softmax(tf.matmul(x, variables.weights) + variables.bias)

def model_forward_pass(variables, batch):
y = predict_on_batch(variables, batch['x'])
predictions = tf.cast(tf.argmax(y, 1), tf.int32)

flat_labels = tf.reshape(batch['y'], [-1])
loss = -tf.reduce_mean(tf.reduce_sum(tf.one_hot(flat_labels, len(label_encoder.classes_)) * tf.math.log(y), axis=[1]))
accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, flat_labels), tf.float32))
num_examples = tf.cast(tf.size(batch['y']), tf.float32)
variables.num_examples.assign_add(num_examples)
variables.loss_sum.assign_add(loss * num_examples)
variables.accuracy_sum.assign_add(accuracy * num_examples)
return loss, predictions

def get_local_model_metrics(variables):
return collections.OrderedDict(
num_examples=variables.num_examples,
loss=variables.loss_sum / variables.num_examples,
accuracy=variables.accuracy_sum / variables.num_examples)

@tff.federated_computation
def aggregate_model_metrics_across_clients(metrics):
return collections.OrderedDict(
num_examples=tff.federated_sum(metrics.num_examples),
loss=tff.federated_mean(metrics.loss, metrics.num_examples),
accuracy=tff.federated_mean(metrics.accuracy, metrics.num_examples))

def reset_metrics(self):
self._variables.num_examples.assign(0)
self._variables.loss_sum.assign(0.0)
self._variables.accuracy_sum.assign(0.0)

from typing import Callable, List, OrderedDict

class IOTModel(tff.learning.models.VariableModel):
def reset_metrics(self):
self._variables.num_examples.assign(0)
self._variables.loss_sum.assign(0.0)
self._variables.accuracy_sum.assign(0.0)

def __init__(self):
    self._variables = create_model_variables()
@property
def trainable_variables(self):
    return [self._variables.weights, self._variables.bias]

@property
def non_trainable_variables(self):
    return []

@property
def local_variables(self):
    return [
        self._variables.num_examples, self._variables.loss_sum,
        self._variables.accuracy_sum
    ]

@property
def input_spec(self):
    return OrderedDict(
        x=tf.TensorSpec([None, len(features)], tf.float32),
        y=tf.TensorSpec([None, 1], tf.int32))

@tf.function
def predict_on_batch(self, x, training=True):
    del training
    return predict_on_batch(self._variables, x)

@tf.function
def forward_pass(self, batch, training=True):
    del training
    loss, predictions = model_forward_pass(self._variables, batch)
    num_exmaples = tf.shape(batch['x'])[0]
    return tff.learning.models.BatchOutput(loss=loss, predictions=predictions, num_examples=num_exmaples)

@tf.function
def report_local_outputs(self):
    return get_local_model_metrics(self._variables)

@property
def federated_output_computation(self):
    return aggregate_model_metrics_across_clients

@tf.function
def report_local_unfinalized_metrics(self) -> OrderedDict[str, List[tf.Tensor]]:
    """Creates an `OrderedDict` of metric names to unfinalized values."""
    return collections.OrderedDict(
        num_examples=[self._variables.num_examples],
        loss=[self._variables.loss_sum, self._variables.num_examples],
        accuracy=[self._variables.accuracy_sum, self._variables.num_examples])

def metric_finalizers(self) -> OrderedDict[str, Callable[[List[tf.Tensor]], tf.Tensor]]:
    """Creates an `OrderedDict` of metric names to finalizers."""
    return collections.OrderedDict(
        num_examples=tf.function(func=lambda x: x[0]),
        loss=tf.function(func=lambda x: x[0] / x[1]),
        accuracy=tf.function(func=lambda x: x[0] / x[1]))

iterative_process = tff.learning.algorithms.build_weighted_fed_avg (
IOTModel,
client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))

state = iterative_process.initialize()

state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))

for round_num in range(2, 11):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))

evaluation = tff.learning.algorithms.build_weighted_fed_avg(IOTModel)

str(evaluation.get_model_weights) //Error is Here

train_metrics = evaluation(metrics.state, federated_train_data)

str(train_metrics)

NUM_CLIENTS = len(np.unique(test_df[client_id_colname]))

sample_clients = test_data.client_ids[0:NUM_CLIENTS]

federated_test_data = make_federated_data(test_data, sample_clients)

len(federated_test_data), federated_test_data[0]

test_metrics = evaluation(state.model, federated_test_data)

iot_model = IOTModel()
state.model.assign_weights_to(iot_model)
y_hat = iot_model.predict_on_batch(test_df[features])
y_hat = np.argmax(y_hat, axis=1)
y_hat

from sklearn.metrics import classification_report

print(classification_report(test_df["type"].values, y_hat))

Hello. If you can post a (1) minimal repro of your issue (it looks like you don't need all the dataset processing/pandas/matplotlib to repro) and (2) correctly formatted code (the fact that it goes in and out of markdown code formatting makes it really hard to read), it'll be a lot easier to debug.

That being said, based on the stack trace your error is not in the line you claim, it's int he following line:

train_metrics = evaluation(metrics.state, federated_train_data)

This isn't a TFF issue - your metrics are an ordered dictionary, and you're trying to access an attribute instead of a key.

Marking this as resolved for now - if you encounter an issue with TFF please let us know.