google-parfait / tensorflow-federated

An open-source framework for machine learning and other computations on decentralized data.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

OperatorNotAllowedInGraphError Traceback (most recent call last)

MichaelAng3 opened this issue · comments

Script:

!pip install --force-reinstall 'jax==0.4.14' 'portpicker==1.6' 'typing-extensions==4.5.0' 'numpy==1.25' 'sqlalchemy==1.4.20' 'ml-dtypes==0.2.0' 'scipy==1.9.3'

!pip install --upgrade 'tensorflow-federated==0.64.0'

!pip install --upgrade tensorflow_federated

Install TensorFlow version 2.14.0

!pip install --upgrade 'tensorflow==2.14.0'

Install TensorFlow Federated version 0.64.0

!pip install --upgrade 'tensorflow-federated==0.64.0'

!pip show tensorflow_federated

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_federated as tff
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import re
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from xgboost import XGBClassifier
from sklearn.linear_model import SGDClassifier
import nltk
from google.colab import drive
drive.mount('/content/drive')

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def clean_text(text):
text = text.lower()
text = re.sub('https?://\S+|www.\S+', '', text)
text = re.sub('<.?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w
\d\w*', '', text)
stop_words = set(nltk.corpus.stopwords.words('english'))
words = nltk.tokenize.word_tokenize(text)
return ' '.join([word for word in words if word not in stop_words])

def lemmatize_text(text):
lemmatizer = nltk.stem.WordNetLemmatizer()
return ' '.join([lemmatizer.lemmatize(word) for word in text.split()])

def load_dataset(file_path):
data = pd.read_csv(file_path)
data = data[['tweet', 'class']]
data['tweet'] = data['tweet'].apply(clean_text).apply(lemmatize_text)
return data

def model_fn():
print("Building model...")
# Define Keras model without compiling
keras_model = Sequential([
Embedding(input_dim=10000, output_dim=64, input_length=100),
LSTM(64),
Dense(64, activation='relu'),
Dense(3, activation='softmax')
])

# Define input_spec as per your data structure
input_spec = (tf.TensorSpec(shape=[None, 100], dtype=tf.int32),
              tf.TensorSpec(shape=[None], dtype=tf.int32))

# Define loss and metrics outside of the keras_model.compile
loss = tf.keras.losses.SparseCategoricalCrossentropy()
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
print("Model built.")
# Use the from_keras_model function as per TFF documentation
return tff.learning.models.from_keras_model(
    keras_model=keras_model,
    loss=loss,
    input_spec=input_spec,
    metrics=metrics
)

Ensure TensorFlow runs in eager execution mode

tf.config.run_functions_eagerly(True)

Enable eager execution for tf.data functions

tf.data.experimental.enable_debug_mode()

Build the federated averaging process without tf.function

def build_federated_averaging_process():
print("Building federated averaging process...")
process = tff.learning.algorithms.build_weighted_fed_avg(
model_fn=model_fn,
client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0)
)
print("Federated averaging process built.")
return process

data = load_dataset('/content/drive/My Drive/Colab Notebooks/labeled_data.csv')
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_data['tweet'])

def create_federated_data(data, tokenizer, num_clients=10):
client_data_splits = np.array_split(data, num_clients)
federated_data = []

for client_data in client_data_splits:
    # Tokenizing and padding sequences
    features = pad_sequences(tokenizer.texts_to_sequences(client_data['tweet']), maxlen=100)
    labels = client_data['class'].values

    # Creating tensor slices
    client_dataset = tf.data.Dataset.from_tensor_slices((features, labels)).batch(20)
    federated_data.append(client_dataset)

return federated_data

Ensure TensorFlow runs in eager execution mode

tf.config.run_functions_eagerly(True)

Enable eager execution for tf.data functions

tf.data.experimental.enable_debug_mode()

federated_train_data = create_federated_data(train_data, tokenizer, num_clients=10)
print("Federated training data created.")
iterative_process = build_federated_averaging_process()
print("Iterative process initialized.")
state = iterative_process.initialize()
print("State initialized.")

NUM_ROUNDS = 5
for round_num in range(1, NUM_ROUNDS + 1):
print(f"Starting round {round_num}...")
state, metrics = iterative_process.next(state, federated_train_data)
print('Round {:2d}, Metrics: {}'.format(round_num, metrics))

Prepare the data for traditional machine learning models

Using the same train_data and test_data from above

Preprocess the data

corpus = [clean_text(t) for t in train_data]
corpus = [lemmatize_text(t) for t in corpus]

Vectorize the text

vectorizer = CountVectorizer(max_features=2000)
X = vectorizer.fit_transform(corpus).toarray()

Split the vectorized text data into training and test sets

X_train, X_test, y_train_ml, y_test_ml = train_test_split(X, train_labels, test_size=0.2, random_state=0)

Train various classifiers and evaluate them

classifiers = {
"Naive Bayes": GaussianNB(),
"Decision Tree": DecisionTreeClassifier(criterion='entropy', random_state=0),
"KNN": KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2),
"Logistic Regression": LogisticRegression(random_state=0),
"Random Forest": RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0),
"SVM": svm.SVC(),
"XGBoost": XGBClassifier(),
"SGD": SGDClassifier(random_state=0)
}

Dictionary to store accuracies

accuracies = {}

for classifier_name, classifier in classifiers.items():
classifier.fit(X_train, y_train_ml)
y_pred = classifier.predict(X_test)
accuracies[classifier_name] = accuracy_score(y_test_ml, y_pred)
print(f"{classifier_name} Accuracy: {accuracies[classifier_name]}")

Visualize the classifier accuracies with percentages

plt.figure(figsize=(14, 7))
barlist = plt.bar(range(len(accuracies)), list(accuracies.values()), align='center', tick_label=list(accuracies.keys()))
plt.xticks(range(len(accuracies)), list(accuracies.keys()), rotation=90) # Rotate names to prevent cutting off

Annotate each bar with the percentage value

for idx, rect in enumerate(barlist):
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width()/2., 1.005*height,
f'{list(accuracies.values())[idx]:.2%}', ha='center', va='bottom', rotation=0)

plt.tight_layout() # Adjust layout to prevent cutting off
plt.title('Comparison of Classifier Accuracies')
plt.ylabel('Accuracy')
plt.show()

Error:
OperatorNotAllowedInGraphError Traceback (most recent call last)
in <cell line: 137>()
135 federated_train_data = create_federated_data(train_data, tokenizer, num_clients=10)
136 print("Federated training data created.")
--> 137 iterative_process = build_federated_averaging_process()
138 print("Iterative process initialized.")
139 state = iterative_process.initialize()

13 frames
/usr/local/lib/python3.10/dist-packages/tensorflow_federated/python/learning/templates/model_delta_client_work.py in _choose_client_weight(weighting, has_non_finite_delta, num_examples)
233
234 def _choose_client_weight(weighting, has_non_finite_delta, num_examples):
--> 235 if has_non_finite_delta > 0:
236 return tf.constant(0.0, tf.float32)
237 else:

OperatorNotAllowedInGraphError: Using a symbolic tf.Tensor as a Python bool is not allowed. You can attempt the following resolutions to the problem: If you are running in Graph mode, use Eager execution mode or decorate this function with @tf.function. If you are using AutoGraph, you can try decorating this function with @tf.function. If that does not work, then you may be using an unsupported feature or your source code may not be visible to AutoGraph. See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/limitations.md#access-to-source-code for more information.

Tensorflow federated version: 0.64.0, i tried 0.65.0 as well but same error