How to create custom component for sentiment analyzer in Rasa 3.X.

Question

How to create custom component for sentiment analyzer in Rasa 3.X.

shreyashgupta68 opened this issue 2 years ago · comments

Hi everyone,

First of all, thanks for this amazing project. I am working on a sentiment analyzer and am having some trouble understanding how it works. I need to use a naive Bayes classifier to train my NLU training examples on sentiment analysis. Do any of you know how to do it? I am attaching my custom sentiment analyzer component that I have made for Rasa **3.1.0. **

import logging
from typing import Any, Text, Dict, List

from joblib import dump, load
from nltk.classify import NaiveBayesClassifier

from rasa.engine.recipes.default_recipe import DefaultV1Recipe
from rasa.engine.graph import ExecutionContext, GraphComponent
from rasa.engine.storage.resource import Resource
from rasa.engine.storage.storage import ModelStorage
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.constants import (TEXT)
logger = logging.getLogger(name)

@DefaultV1Recipe.register(
DefaultV1Recipe.ComponentType.MESSAGE_TOKENIZER, is_trainable=True
)
class DemoSentiment(GraphComponent):
name = "sentiment"
provides = ["entities"]
requires = ["tokens"]
defaults = {}
language_list = ["en"]
print('initialised the class')

def __init__(
        self,
        config: Dict[Text, Any],
        name: Text,
        model_storage: ModelStorage,
        resource: Resource,
) -> None:
    self.name = name
    # self.clf = NaiveBayesClassifier(
    #     feature_probdist=None,
    #     label_probdist=None
    # )

    # We need to use these later when saving the trained component.
    self._model_storage = model_storage
    self._resource = resource

def preprocessing(self, tokens):
    """Create bag-of-words representation of the training examples."""

    return {word: True for word in tokens}

def train(self, training_data: TrainingData) -> Resource:
    """Trains the component from training data."""
    texts = [e.get(TEXT) for e in training_data.intent_examples if e.get(TEXT)]
    with open('labels.txt', 'r') as f: # in this labels .txt I have store the labels like positive, negative and neutral
        labels = f.read().splitlines()
    print("type : ", self)
    processed_tokens = [self.preprocessing(t) for t in texts]
    labeled_data = [(t, x) for t, x in zip(processed_tokens, labels)]
    self.clf = NaiveBayesClassifier.train(labeled_data)
    self.persist()
    return self._resource

def convert_to_rasa(self, value, confidence):
    """Convert model output into the Rasa NLU compatible output format."""

    entity = {"value": value,
              "confidence": confidence,
              "entity": "sentiment",
              "extractor": "sentiment_extractor"}

    return entity

@classmethod
def create(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext
) -> GraphComponent:
    print("Model_Create :", model_storage)
    return cls(config, execution_context.node_name, model_storage, resource)

def process(self, messages: List[Message]) -> List[Message]:
    # TODO: This is the method which Rasa Open Source will call during inference.
    if not self.clf:
        # component is either not trained or didn't
        # receive enough training data
        entity = None
    else:
        for message in messages:
            tokens = [t for t in message.get(TEXT)]
            tb = self.preprocessing(tokens)
            pre = self.clf.prob_classify(tb)

            sentiment = pre.max()
            confidence = pre.prob(sentiment)

            entity = self.convert_to_rasa(sentiment, confidence)

            message.set("entities", [entity], add_to_output=True)
    return messages

def persist(self) -> None:
    """
    Persist this model into the passed directory.

    Returns the metadata necessary to load the model again. In this case; `None`.
    """

    with self._model_storage.write_to(self._resource) as model_dir:
        dump(self.clf, model_dir / f"{self.name}.joblib")
        # classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
        # utils.json_pickle(classifier_file, self)
        # return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}

@classmethod
def load(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
) -> GraphComponent:
    """Loads trained component from disk."""
    with model_storage.read_from(resource) as model_dir:
        classifier = load(model_dir / f"{resource.name}.joblib")
        component = cls(
            config, execution_context.node_name, model_storage, resource
        )
        component.clf = classifier
        return component
        # file_name = config.get("classifier_file")
        # classifier_file = os.path.join(model_dir, file_name)
        # return utils.json_unpickle(classifier_file)

def process_training_data(self, training_data: TrainingData) -> TrainingData:
    #self.process(training_data.intent_examples)
    pass
    #return training_data

@classmethod
def validate_config(cls, config: Dict[Text, Any]) -> None:
    """Validates that the component is configured properly."""
    pass