How to create custom component for sentiment analyzer in Rasa 3.X.
shreyashgupta68 opened this issue · comments
Hi everyone,
First of all, thanks for this amazing project. I am working on a sentiment analyzer and am having some trouble understanding how it works. I need to use a naive Bayes classifier to train my NLU training examples on sentiment analysis. Do any of you know how to do it? I am attaching my custom sentiment analyzer component that I have made for Rasa **3.1.0. **
import logging
from typing import Any, Text, Dict, List
from joblib import dump, load
from nltk.classify import NaiveBayesClassifier
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
from rasa.engine.graph import ExecutionContext, GraphComponent
from rasa.engine.storage.resource import Resource
from rasa.engine.storage.storage import ModelStorage
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.constants import (TEXT)
logger = logging.getLogger(name)
@DefaultV1Recipe.register(
DefaultV1Recipe.ComponentType.MESSAGE_TOKENIZER, is_trainable=True
)
class DemoSentiment(GraphComponent):
name = "sentiment"
provides = ["entities"]
requires = ["tokens"]
defaults = {}
language_list = ["en"]
print('initialised the class')
def __init__(
self,
config: Dict[Text, Any],
name: Text,
model_storage: ModelStorage,
resource: Resource,
) -> None:
self.name = name
# self.clf = NaiveBayesClassifier(
# feature_probdist=None,
# label_probdist=None
# )
# We need to use these later when saving the trained component.
self._model_storage = model_storage
self._resource = resource
def preprocessing(self, tokens):
"""Create bag-of-words representation of the training examples."""
return {word: True for word in tokens}
def train(self, training_data: TrainingData) -> Resource:
"""Trains the component from training data."""
texts = [e.get(TEXT) for e in training_data.intent_examples if e.get(TEXT)]
with open('labels.txt', 'r') as f: # in this labels .txt I have store the labels like positive, negative and neutral
labels = f.read().splitlines()
print("type : ", self)
processed_tokens = [self.preprocessing(t) for t in texts]
labeled_data = [(t, x) for t, x in zip(processed_tokens, labels)]
self.clf = NaiveBayesClassifier.train(labeled_data)
self.persist()
return self._resource
def convert_to_rasa(self, value, confidence):
"""Convert model output into the Rasa NLU compatible output format."""
entity = {"value": value,
"confidence": confidence,
"entity": "sentiment",
"extractor": "sentiment_extractor"}
return entity
@classmethod
def create(
cls,
config: Dict[Text, Any],
model_storage: ModelStorage,
resource: Resource,
execution_context: ExecutionContext
) -> GraphComponent:
print("Model_Create :", model_storage)
return cls(config, execution_context.node_name, model_storage, resource)
def process(self, messages: List[Message]) -> List[Message]:
# TODO: This is the method which Rasa Open Source will call during inference.
if not self.clf:
# component is either not trained or didn't
# receive enough training data
entity = None
else:
for message in messages:
tokens = [t for t in message.get(TEXT)]
tb = self.preprocessing(tokens)
pre = self.clf.prob_classify(tb)
sentiment = pre.max()
confidence = pre.prob(sentiment)
entity = self.convert_to_rasa(sentiment, confidence)
message.set("entities", [entity], add_to_output=True)
return messages
def persist(self) -> None:
"""
Persist this model into the passed directory.
Returns the metadata necessary to load the model again. In this case; `None`.
"""
with self._model_storage.write_to(self._resource) as model_dir:
dump(self.clf, model_dir / f"{self.name}.joblib")
# classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
# utils.json_pickle(classifier_file, self)
# return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}
@classmethod
def load(
cls,
config: Dict[Text, Any],
model_storage: ModelStorage,
resource: Resource,
execution_context: ExecutionContext,
) -> GraphComponent:
"""Loads trained component from disk."""
with model_storage.read_from(resource) as model_dir:
classifier = load(model_dir / f"{resource.name}.joblib")
component = cls(
config, execution_context.node_name, model_storage, resource
)
component.clf = classifier
return component
# file_name = config.get("classifier_file")
# classifier_file = os.path.join(model_dir, file_name)
# return utils.json_unpickle(classifier_file)
def process_training_data(self, training_data: TrainingData) -> TrainingData:
#self.process(training_data.intent_examples)
pass
#return training_data
@classmethod
def validate_config(cls, config: Dict[Text, Any]) -> None:
"""Validates that the component is configured properly."""
pass