Classify unicode emoji and Gemoji shortcodes as EmoticonNode
s.
Implemented by retext-emoji, but separated for use by standalone (non-retext) processing.
npm:
npm install nlcst-emoji-modifier
var modifier = require('nlcst-emoji-modifier');
var inspect = require('unist-util-inspect');
var english = require('parse-english')();
english.useFirst('tokenizeSentence', modifier);
console.log(inspect(english.parse('It\'s raining :cat:s and :dog:s.')));
Yields:
RootNode[1]
└─ ParagraphNode[1]
└─ SentenceNode[11]
├─ WordNode[3]
│ ├─ TextNode: "It"
│ ├─ PunctuationNode: "'"
│ └─ TextNode: "s"
├─ WhiteSpaceNode: " "
├─ WordNode[1]
│ └─ TextNode: "raining"
├─ WhiteSpaceNode: " "
├─ EmoticonNode: ":cat:"
├─ WordNode[1]
│ └─ TextNode: "s"
├─ WhiteSpaceNode: " "
├─ WordNode[1]
│ └─ TextNode: "and"
├─ WhiteSpaceNode: " "
├─ EmoticonNode: ":dog:"
└─ WordNode[2]
├─ TextNode: "s"
└─ PunctuationNode: "."
Merge emoji and gemoji into a new EmoticonNode
.
paragraph
(NLCSTParagraphNode
).