zentity-io / zentity

Entity resolution for Elasticsearch.

Home Page:https://zentity.io

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

_attribute not valorised and hops not executed

licenziato opened this issue · comments

commented

We are trying to use Zentity 1.5.1 with Elasticsearch 7.3.2. for a pilot project. we're not able to make working a chain of resolvers most probably for the problem reported in the title.
I report here the index mapping of the data:

{
    "obj-person": {
        "aliases": {},
        "mappings": {
            "dynamic": "strict",
            "properties": {
                "completeName": {
                    "type": "text",
                    "fields": {
                        "phonetic": {
                            "type": "text",
                            "analyzer": "phonetic_analyzer"
                        }
                    }
                },
                "entry": {
                    "properties": {
                        "createdBy": {
                            "type": "keyword"
                        },
                        "createdDate": {
                            "type": "date"
                        },
                        "infoObject": {
                            "properties": {
                                "dateOfBirth": {
                                    "properties": {
                                        "originalValue": {
                                            "type": "text",
                                            "copy_to": [
                                                "search"
                                            ]
                                        },
                                        "value": {
                                            "type": "text",
                                            "copy_to": [
                                                "search"
                                            ]
                                        }
                                    }
                                },
                                "firstName": {
                                    "properties": {
                                        "originalValue": {
                                            "type": "text",
                                            "copy_to": [
                                                "search"
                                            ]
                                        },
                                        "value": {
                                            "type": "text",
                                            "fields": {
                                                "phonetic": {
                                                    "type": "text",
                                                    "analyzer": "phonetic_analyzer"
                                                }
                                            },
                                            "copy_to": [
                                                "search",
                                                "completeName"
                                            ]
                                        }
                                    }
                                },
                                "lastName": {
                                    "properties": {
                                        "originalValue": {
                                            "type": "text",
                                            "copy_to": [
                                                "search"
                                            ]
                                        },
                                        "value": {
                                            "type": "text",
                                            "fields": {
                                                "phonetic": {
                                                    "type": "text",
                                                    "analyzer": "phonetic_analyzer"
                                                }
                                            },
                                            "copy_to": [
                                                "search",
                                                "completeName"
                                            ]
                                        }
                                    }
                                }
                            }
                        }
                    }
                },
                "search": {
                    "type": "text",
                    "fields": {
                        "graphically_similar": {
                            "type": "text",
                            "analyzer": "normalize_graphically_similar_analyzer"
                        },
                        "normalized": {
                            "type": "text",
                            "analyzer": "normalize_alphanum_analyzer"
                        },
                        "phonetic": {
                            "type": "text",
                            "analyzer": "phonetic_analyzer"
                        }
                    }
                },
                "search_sensitive": {
                    "type": "text"
                },
                "type": {
                    "type": "keyword"
                }
            }
        },
        "settings": {
            "index": {
                "number_of_shards": "1",
                "auto_expand_replicas": "1-5",
                "provided_name": "obj-person",
                "creation_date": "1582637094035",
                "analysis": {
                    "filter": {
                        "phonetic_filter": {
                            "replace": "true",
                            "type": "phonetic",
                            "encoder": "double_metaphone"
                        }
                    },
                    "analyzer": {
                        "phonetic_analyzer": {
                            "filter": [
                                "phonetic_filter"
                            ],
                            "tokenizer": "standard"
                        },
                        "normalize_graphically_similar_analyzer": {
                            "filter": [
                                "uppercase"
                            ],
                            "char_filter": [
                                "strip_special_chars",
                                "replace_graphically_similar"
                            ],
                            "type": "custom",
                            "tokenizer": "keyword"
                        },
                        "normalize_alphanum_analyzer": {
                            "filter": [
                                "uppercase",
                                "reverse"
                            ],
                            "char_filter": "strip_special_chars",
                            "type": "custom",
                            "tokenizer": "keyword"
                        }
                    },
                    "char_filter": {
                        "replace_graphically_similar": {
                            "type": "mapping",
                            "mappings": [
                                "O => 0",
                                "D => 0",
                                "I => 1",
                                "B => 8",
                                "S => 5",
                                "Z => 2",
                                "G => 6",
                                "E => 3",
                                "o => 0",
                                "d => 0",
                                "i => 1",
                                "b => 8",
                                "s => 5",
                                "z => 2",
                                "g => 6",
                                "e => 3"
                            ]
                        },
                        "strip_special_chars": {
                            "pattern": "[^\\w]",
                            "type": "pattern_replace",
                            "replacement": ""
                        }
                    }
                },
                "number_of_replicas": "1",
                "uuid": "JFGNOU6xR4i_BHM8e0nB5Q",
                "version": {
                    "created": "7030199"
                }
            }
        }
    }
}

Creating a zentity model like this:

PUT _zentity/models/zentity_test_resolution_person 
{
    "attributes" : {
      "first_name" : {
        "type" : "string"
      },
      "last_name" : {
        "type" : "string"
      },
      "dob" : {
        "type" : "string"
      }
    },
    "resolvers" : {
      "name_only" : {
        "attributes" : [
          "first_name",
          "last_name"
        ]
      },
      "dob" : {
        "attributes" : [
          "dob"
        ]
      }
    },
    "matchers" : {
      "simple" : {
        "clause" : {
          "match" : {
            "{{ field }}" : "{{ value }}"
          }
        }
      },
      "fuzzy" : {
        "clause" : {
          "match" : {
            "{{ field }}" : {
              "query" : "{{ value }}",
              "fuzziness" : "{{ params.fuzziness }}"
            }
          }
        },
        "params" : {
          "fuzziness" : "auto"
        }
      }
    },
    "indices" : {
      "obj-person" : {
        "fields" : {
          "entry.infoObject.firstName.value" : {
            "attribute" : "first_name",
            "matcher" : "fuzzy"
          },
          "entry.infoObject.lastName.value" : {
            "attribute" : "last_name",
            "matcher" : "fuzzy"
          },
         "entry.infoObject.dateOfBirth.value" : {
            "attribute" : "dob",
            "matcher" : "simple"
          }
        }
      }
    }
}

Using three objects which the core data is:
Person1:

"firstName" : {
  "value" : "Nolan"
},
"lastName" : {
  "value" : "Hendricks"
},
"dateOfBirth" : {
  "value" : "633-9242"
}

Person2:

"firstName" : {
  "value" : "Nolan"
},
"lastName" : {
  "value" : "Hendricks"
},
"dateOfBirth" : {
  "value" : "677-9999"
}

Person3:

"firstName" : {
  "value" : "Noln"
},
"lastName" : {
  "value" : "Hendricks"
},
"dateOfBirth" : {
  "value" : "677-9999"
}

If we execute this resolution:

POST _zentity/resolution/zentity_test_resolution_person?_source=false&_explanation=false
{
  "attributes": {
    "first_name": {
      "values":  ["Nolan"],
      "params": {
        "fuzziness": "0"
      }
    },
    "last_name": ["Hendricks"]
  }
}

the result is this:

{
  "took" : 2,
  "hits" : {
    "total" : 2,
    "hits" : [ {
      "_index" : "obj-person",
      "_type" : "_doc",
      "_id" : "2D6F8CCF-227B-4FBF-A749-16C098BB0C0A",
      "_hop" : 0,
      "_query" : 0,
      "_attributes" : {
        "dob" : [ ],
        "first_name" : [ ],
        "last_name" : [ ]
      }
    }, {
      "_index" : "obj-person",
      "_type" : "_doc",
      "_id" : "1CE639AD-B3FD-4FAB-9D9B-A469DE75C943",
      "_hop" : 0,
      "_query" : 0,
      "_attributes" : {
        "dob" : [ ],
        "first_name" : [ ],
        "last_name" : [ ]
      }
    } ]
  }
}

How you can see _attributes are not valorized at all and no all the hops has been performed: I'll expect to see the third result based on the "dob" field.
Can you point me in the right direction?

@licenziato I'm sure this response comes far too late for your needs and I apologize for that. I'll answer it anyway for the record.

I wasn't able to reproduce your issue. I reproduced your setup (zentity 1.5.1, Elasticsearch 7.3.2) and configurations, and my resolution job included the third document which was matched by the "dob" resolver (see below). If I had to guess, the issue may have been rooted in the mapping, data, or other configuration in your environment.

POST _zentity/resolution/zentity_test_resolution_person?_source=false&_explanation=true
{
  "took" : 65,
  "hits" : {
    "total" : 3,
    "hits" : [ {
      "_index" : "obj-person",
      "_type" : "_doc",
      "_id" : "1",
      "_hop" : 0,
      "_query" : 0,
      "_attributes" : {
        "dob" : [ "633-9242" ],
        "first_name" : [ "Nolan" ],
        "last_name" : [ "Hendricks" ]
      },
      "_explanation" : {
        "resolvers" : {
          "name_only" : {
            "attributes" : [ "first_name", "last_name" ]
          }
        },
        "matches" : [ {
          "attribute" : "first_name",
          "target_field" : "entry.infoObject.firstName.value",
          "target_value" : "Nolan",
          "input_value" : "Nolan",
          "input_matcher" : "fuzzy",
          "input_matcher_params" : {
            "fuzziness" : "0"
          }
        }, {
          "attribute" : "last_name",
          "target_field" : "entry.infoObject.lastName.value",
          "target_value" : "Hendricks",
          "input_value" : "Hendricks",
          "input_matcher" : "fuzzy",
          "input_matcher_params" : { }
        } ]
      }
    }, {
      "_index" : "obj-person",
      "_type" : "_doc",
      "_id" : "2",
      "_hop" : 0,
      "_query" : 0,
      "_attributes" : {
        "dob" : [ "677-9999" ],
        "first_name" : [ "Nolan" ],
        "last_name" : [ "Hendricks" ]
      },
      "_explanation" : {
        "resolvers" : {
          "name_only" : {
            "attributes" : [ "first_name", "last_name" ]
          }
        },
        "matches" : [ {
          "attribute" : "first_name",
          "target_field" : "entry.infoObject.firstName.value",
          "target_value" : "Nolan",
          "input_value" : "Nolan",
          "input_matcher" : "fuzzy",
          "input_matcher_params" : {
            "fuzziness" : "0"
          }
        }, {
          "attribute" : "last_name",
          "target_field" : "entry.infoObject.lastName.value",
          "target_value" : "Hendricks",
          "input_value" : "Hendricks",
          "input_matcher" : "fuzzy",
          "input_matcher_params" : { }
        } ]
      }
    }, {
      "_index" : "obj-person",
      "_type" : "_doc",
      "_id" : "3",
      "_hop" : 1,
      "_query" : 0,
      "_attributes" : {
        "dob" : [ "677-9999" ],
        "first_name" : [ "Noln" ],
        "last_name" : [ "Hendricks" ]
      },
      "_explanation" : {
        "resolvers" : {
          "dob" : {
            "attributes" : [ "dob" ]
          }
        },
        "matches" : [ {
          "attribute" : "dob",
          "target_field" : "entry.infoObject.dateOfBirth.value",
          "target_value" : "677-9999",
          "input_value" : "677-9999",
          "input_matcher" : "simple",
          "input_matcher_params" : { }
        }, {
          "attribute" : "last_name",
          "target_field" : "entry.infoObject.lastName.value",
          "target_value" : "Hendricks",
          "input_value" : "Hendricks",
          "input_matcher" : "fuzzy",
          "input_matcher_params" : { }
        } ]
      }
    } ]
  }
}